From 13ed9438fb47d62663fb1ef367baac1a567b25b3 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 4 Feb 2020 11:23:12 -0400
Subject: [PATCH 01/24] Common: Implement a basic SpinLock class

---
 src/common/CMakeLists.txt |  2 ++
 src/common/spin_lock.cpp  | 46 +++++++++++++++++++++++++++++++++++++++
 src/common/spin_lock.h    | 20 +++++++++++++++++
 3 files changed, 68 insertions(+)
 create mode 100644 src/common/spin_lock.cpp
 create mode 100644 src/common/spin_lock.h

diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 0a3e2f4d1..c8bf80372 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -143,6 +143,8 @@ add_library(common STATIC
     scm_rev.cpp
     scm_rev.h
     scope_exit.h
+    spin_lock.cpp
+    spin_lock.h
     string_util.cpp
     string_util.h
     swap.h
diff --git a/src/common/spin_lock.cpp b/src/common/spin_lock.cpp
new file mode 100644
index 000000000..8077b78d2
--- /dev/null
+++ b/src/common/spin_lock.cpp
@@ -0,0 +1,46 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/spin_lock.h"
+
+#if _MSC_VER
+#include <intrin.h>
+#if _M_AMD64
+#define __x86_64__ 1
+#endif
+#if _M_ARM64
+#define __aarch64__ 1
+#endif
+#else
+#if __x86_64__
+#include <xmmintrin.h>
+#endif
+#endif
+
+namespace {
+
+void thread_pause() {
+#if __x86_64__
+    _mm_pause();
+#elif __aarch64__ && _MSC_VER
+    __yield();
+#elif __aarch64__
+    asm("yield");
+#endif
+}
+
+} // namespace
+
+namespace Common {
+
+void SpinLock::lock() {
+    while (lck.test_and_set(std::memory_order_acquire))
+        thread_pause();
+}
+
+void SpinLock::unlock() {
+    lck.clear(std::memory_order_release);
+}
+
+} // namespace Common
diff --git a/src/common/spin_lock.h b/src/common/spin_lock.h
new file mode 100644
index 000000000..cbc67b6c8
--- /dev/null
+++ b/src/common/spin_lock.h
@@ -0,0 +1,20 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <atomic>
+
+namespace Common {
+
+class SpinLock {
+public:
+    void lock();
+    void unlock();
+
+private:
+    std::atomic_flag lck = ATOMIC_FLAG_INIT;
+};
+
+} // namespace Common

From bc266a9d98f38f6fd1006f1ca52bd57e6a7f37d3 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 4 Feb 2020 15:06:23 -0400
Subject: [PATCH 02/24] Common: Implement a basic Fiber class.

---
 src/common/CMakeLists.txt |   2 +
 src/common/fiber.cpp      | 147 ++++++++++++++++++++++++++++++++++++++
 src/common/fiber.h        |  55 ++++++++++++++
 3 files changed, 204 insertions(+)
 create mode 100644 src/common/fiber.cpp
 create mode 100644 src/common/fiber.h

diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index c8bf80372..554d6e253 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -110,6 +110,8 @@ add_library(common STATIC
     common_types.h
     dynamic_library.cpp
     dynamic_library.h
+    fiber.cpp
+    fiber.h
     file_util.cpp
     file_util.h
     hash.h
diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
new file mode 100644
index 000000000..eb59f1aa9
--- /dev/null
+++ b/src/common/fiber.cpp
@@ -0,0 +1,147 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/fiber.h"
+
+namespace Common {
+
+#ifdef _MSC_VER
+#include <windows.h>
+
+struct Fiber::FiberImpl {
+    LPVOID handle = nullptr;
+};
+
+void Fiber::_start([[maybe_unused]] void* parameter) {
+    guard.lock();
+    if (previous_fiber) {
+        previous_fiber->guard.unlock();
+        previous_fiber = nullptr;
+    }
+    entry_point(start_parameter);
+}
+
+static void __stdcall FiberStartFunc(LPVOID lpFiberParameter)
+{
+   auto fiber = static_cast<Fiber *>(lpFiberParameter);
+   fiber->_start(nullptr);
+}
+
+Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
+    : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter}, previous_fiber{} {
+    impl = std::make_unique<FiberImpl>();
+    impl->handle = CreateFiber(0, &FiberStartFunc, this);
+}
+
+Fiber::Fiber() : guard{}, entry_point{}, start_parameter{}, previous_fiber{} {
+    impl = std::make_unique<FiberImpl>();
+}
+
+Fiber::~Fiber() {
+    // Make sure the Fiber is not being used
+    guard.lock();
+    guard.unlock();
+    DeleteFiber(impl->handle);
+}
+
+void Fiber::Exit() {
+    if (!is_thread_fiber) {
+        return;
+    }
+    ConvertFiberToThread();
+    guard.unlock();
+}
+
+void Fiber::YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to) {
+    to->guard.lock();
+    to->previous_fiber = from;
+    SwitchToFiber(to->impl->handle);
+    auto previous_fiber = from->previous_fiber;
+    if (previous_fiber) {
+        previous_fiber->guard.unlock();
+        previous_fiber.reset();
+    }
+}
+
+std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
+    std::shared_ptr<Fiber> fiber = std::shared_ptr<Fiber>{new Fiber()};
+    fiber->guard.lock();
+    fiber->impl->handle = ConvertThreadToFiber(NULL);
+    fiber->is_thread_fiber = true;
+    return fiber;
+}
+
+#else
+
+#include <boost/context/detail/fcontext.hpp>
+
+constexpr std::size_t default_stack_size = 1024 * 1024 * 4; // 4MB
+
+struct Fiber::FiberImpl {
+    boost::context::detail::fcontext_t context;
+    std::array<u8, default_stack_size> stack;
+};
+
+void Fiber::_start(void* parameter) {
+    guard.lock();
+    boost::context::detail::transfer_t* transfer = static_cast<boost::context::detail::transfer_t*>(parameter);
+    if (previous_fiber) {
+        previous_fiber->impl->context = transfer->fctx;
+        previous_fiber->guard.unlock();
+        previous_fiber = nullptr;
+    }
+    entry_point(start_parameter);
+}
+
+static void FiberStartFunc(boost::context::detail::transfer_t transfer)
+{
+   auto fiber = static_cast<Fiber *>(transfer.data);
+   fiber->_start(&transfer);
+}
+
+Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
+    : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter}, previous_fiber{} {
+    impl = std::make_unique<FiberImpl>();
+    auto start_func = std::bind(&Fiber::start, this);
+    impl->context =
+        boost::context::detail::make_fcontext(impl->stack.data(), impl->stack.size(), &start_func);
+}
+
+Fiber::Fiber() : guard{}, entry_point{}, start_parameter{}, previous_fiber{} {
+    impl = std::make_unique<FiberImpl>();
+}
+
+Fiber::~Fiber() {
+    // Make sure the Fiber is not being used
+    guard.lock();
+    guard.unlock();
+}
+
+void Fiber::Exit() {
+    if (!is_thread_fiber) {
+        return;
+    }
+    guard.unlock();
+}
+
+void Fiber::YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to) {
+    to->guard.lock();
+    to->previous_fiber = from;
+    auto transfer = boost::context::detail::jump_fcontext(to->impl.context, nullptr);
+    auto previous_fiber = from->previous_fiber;
+    if (previous_fiber) {
+        previous_fiber->impl->context = transfer.fctx;
+        previous_fiber->guard.unlock();
+        previous_fiber.reset();
+    }
+}
+
+std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
+    std::shared_ptr<Fiber> fiber = std::shared_ptr<Fiber>{new Fiber()};
+    fiber->is_thread_fiber = true;
+    return fiber;
+}
+
+#endif
+} // namespace Common
diff --git a/src/common/fiber.h b/src/common/fiber.h
new file mode 100644
index 000000000..ab44905cf
--- /dev/null
+++ b/src/common/fiber.h
@@ -0,0 +1,55 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <functional>
+#include <memory>
+
+#include "common/common_types.h"
+#include "common/spin_lock.h"
+
+namespace Common {
+
+class Fiber {
+public:
+    Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter);
+    ~Fiber();
+
+    Fiber(const Fiber&) = delete;
+    Fiber& operator=(const Fiber&) = delete;
+
+    Fiber(Fiber&&) = default;
+    Fiber& operator=(Fiber&&) = default;
+
+    /// Yields control from Fiber 'from' to Fiber 'to'
+    /// Fiber 'from' must be the currently running fiber.
+    static void YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to);
+    static std::shared_ptr<Fiber> ThreadToFiber();
+
+    /// Only call from main thread's fiber
+    void Exit();
+
+    /// Used internally but required to be public, Shall not be used
+    void _start(void* parameter);
+
+    /// Changes the start parameter of the fiber. Has no effect if the fiber already started
+    void SetStartParameter(void* new_parameter) {
+        start_parameter = new_parameter;
+    }
+
+private:
+    Fiber();
+
+    struct FiberImpl;
+
+    SpinLock guard;
+    std::function<void(void*)> entry_point;
+    void* start_parameter;
+    std::shared_ptr<Fiber> previous_fiber;
+    std::unique_ptr<FiberImpl> impl;
+    bool is_thread_fiber{};
+};
+
+} // namespace Common

From 8d0e3c542258cc50081af93aa85e0e3cbf8900c3 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 5 Feb 2020 14:13:16 -0400
Subject: [PATCH 03/24] Tests: Add tests for fibers and refactor/fix Fiber
 class

---
 src/common/fiber.cpp        |  32 +++---
 src/common/fiber.h          |  19 +++-
 src/tests/CMakeLists.txt    |   1 +
 src/tests/common/fibers.cpp | 214 ++++++++++++++++++++++++++++++++++++
 4 files changed, 247 insertions(+), 19 deletions(-)
 create mode 100644 src/tests/common/fibers.cpp

diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
index eb59f1aa9..a2c0401c4 100644
--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -3,18 +3,21 @@
 // Refer to the license.txt file included.
 
 #include "common/fiber.h"
+#ifdef _MSC_VER
+#include <windows.h>
+#else
+#include <boost/context/detail/fcontext.hpp>
+#endif
 
 namespace Common {
 
 #ifdef _MSC_VER
-#include <windows.h>
 
 struct Fiber::FiberImpl {
     LPVOID handle = nullptr;
 };
 
-void Fiber::_start([[maybe_unused]] void* parameter) {
-    guard.lock();
+void Fiber::start() {
     if (previous_fiber) {
         previous_fiber->guard.unlock();
         previous_fiber = nullptr;
@@ -22,10 +25,10 @@ void Fiber::_start([[maybe_unused]] void* parameter) {
     entry_point(start_parameter);
 }
 
-static void __stdcall FiberStartFunc(LPVOID lpFiberParameter)
+void __stdcall Fiber::FiberStartFunc(void* fiber_parameter)
 {
-   auto fiber = static_cast<Fiber *>(lpFiberParameter);
-   fiber->_start(nullptr);
+   auto fiber = static_cast<Fiber *>(fiber_parameter);
+   fiber->start();
 }
 
 Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
@@ -74,30 +77,26 @@ std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
 
 #else
 
-#include <boost/context/detail/fcontext.hpp>
-
 constexpr std::size_t default_stack_size = 1024 * 1024 * 4; // 4MB
 
-struct Fiber::FiberImpl {
-    boost::context::detail::fcontext_t context;
+struct alignas(64) Fiber::FiberImpl {
     std::array<u8, default_stack_size> stack;
+    boost::context::detail::fcontext_t context;
 };
 
-void Fiber::_start(void* parameter) {
-    guard.lock();
-    boost::context::detail::transfer_t* transfer = static_cast<boost::context::detail::transfer_t*>(parameter);
+void Fiber::start(boost::context::detail::transfer_t& transfer) {
     if (previous_fiber) {
-        previous_fiber->impl->context = transfer->fctx;
+        previous_fiber->impl->context = transfer.fctx;
         previous_fiber->guard.unlock();
         previous_fiber = nullptr;
     }
     entry_point(start_parameter);
 }
 
-static void FiberStartFunc(boost::context::detail::transfer_t transfer)
+void Fiber::FiberStartFunc(boost::context::detail::transfer_t transfer)
 {
    auto fiber = static_cast<Fiber *>(transfer.data);
-   fiber->_start(&transfer);
+   fiber->start(transfer);
 }
 
 Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
@@ -139,6 +138,7 @@ void Fiber::YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to) {
 
 std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
     std::shared_ptr<Fiber> fiber = std::shared_ptr<Fiber>{new Fiber()};
+    fiber->guard.lock();
     fiber->is_thread_fiber = true;
     return fiber;
 }
diff --git a/src/common/fiber.h b/src/common/fiber.h
index ab44905cf..812d6644a 100644
--- a/src/common/fiber.h
+++ b/src/common/fiber.h
@@ -10,6 +10,12 @@
 #include "common/common_types.h"
 #include "common/spin_lock.h"
 
+#ifndef _MSC_VER
+namespace boost::context::detail {
+    struct transfer_t;
+}
+#endif
+
 namespace Common {
 
 class Fiber {
@@ -31,9 +37,6 @@ public:
     /// Only call from main thread's fiber
     void Exit();
 
-    /// Used internally but required to be public, Shall not be used
-    void _start(void* parameter);
-
     /// Changes the start parameter of the fiber. Has no effect if the fiber already started
     void SetStartParameter(void* new_parameter) {
         start_parameter = new_parameter;
@@ -42,6 +45,16 @@ public:
 private:
     Fiber();
 
+#ifdef _MSC_VER
+    void start();
+    static void FiberStartFunc(void* fiber_parameter);
+#else
+    void start(boost::context::detail::transfer_t& transfer);
+    static void FiberStartFunc(boost::context::detail::transfer_t transfer);
+#endif
+
+
+
     struct FiberImpl;
 
     SpinLock guard;
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index c7038b217..47ef30aa9 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_executable(tests
     common/bit_field.cpp
     common/bit_utils.cpp
+    common/fibers.cpp
     common/multi_level_queue.cpp
     common/param_package.cpp
     common/ring_buffer.cpp
diff --git a/src/tests/common/fibers.cpp b/src/tests/common/fibers.cpp
new file mode 100644
index 000000000..ff840afa6
--- /dev/null
+++ b/src/tests/common/fibers.cpp
@@ -0,0 +1,214 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <atomic>
+#include <cstdlib>
+#include <functional>
+#include <memory>
+#include <thread>
+#include <unordered_map>
+#include <vector>
+
+#include <catch2/catch.hpp>
+#include <math.h>
+#include "common/common_types.h"
+#include "common/fiber.h"
+#include "common/spin_lock.h"
+
+namespace Common {
+
+class TestControl1 {
+public:
+    TestControl1() = default;
+
+    void DoWork();
+
+    void ExecuteThread(u32 id);
+
+    std::unordered_map<std::thread::id, u32> ids;
+    std::vector<std::shared_ptr<Common::Fiber>> thread_fibers;
+    std::vector<std::shared_ptr<Common::Fiber>> work_fibers;
+    std::vector<u32> items;
+    std::vector<u32> results;
+};
+
+static void WorkControl1(void* control) {
+    TestControl1* test_control = static_cast<TestControl1*>(control);
+    test_control->DoWork();
+}
+
+void TestControl1::DoWork() {
+    std::thread::id this_id = std::this_thread::get_id();
+    u32 id = ids[this_id];
+    u32 value = items[id];
+    for (u32 i = 0; i < id; i++) {
+        value++;
+    }
+    results[id] = value;
+    Fiber::YieldTo(work_fibers[id], thread_fibers[id]);
+}
+
+void TestControl1::ExecuteThread(u32 id) {
+    std::thread::id this_id = std::this_thread::get_id();
+    ids[this_id] = id;
+    auto thread_fiber = Fiber::ThreadToFiber();
+    thread_fibers[id] = thread_fiber;
+    work_fibers[id] = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl1}, this);
+    items[id] = rand() % 256;
+    Fiber::YieldTo(thread_fibers[id], work_fibers[id]);
+    thread_fibers[id]->Exit();
+}
+
+static void ThreadStart1(u32 id, TestControl1& test_control) {
+    test_control.ExecuteThread(id);
+}
+
+
+TEST_CASE("Fibers::Setup", "[common]") {
+    constexpr u32 num_threads = 7;
+    TestControl1 test_control{};
+    test_control.thread_fibers.resize(num_threads, nullptr);
+    test_control.work_fibers.resize(num_threads, nullptr);
+    test_control.items.resize(num_threads, 0);
+    test_control.results.resize(num_threads, 0);
+    std::vector<std::thread> threads;
+    for (u32 i = 0; i < num_threads; i++) {
+        threads.emplace_back(ThreadStart1, i, std::ref(test_control));
+    }
+    for (u32 i = 0; i < num_threads; i++) {
+        threads[i].join();
+    }
+    for (u32 i = 0; i < num_threads; i++) {
+        REQUIRE(test_control.items[i] + i == test_control.results[i]);
+    }
+}
+
+class TestControl2 {
+public:
+    TestControl2() = default;
+
+    void DoWork1() {
+        trap2 = false;
+        while (trap.load());
+        for (u32 i = 0; i < 12000; i++) {
+            value1 += i;
+        }
+        Fiber::YieldTo(fiber1, fiber3);
+        std::thread::id this_id = std::this_thread::get_id();
+        u32 id = ids[this_id];
+        assert1 = id == 1;
+        value2 += 5000;
+        Fiber::YieldTo(fiber1, thread_fibers[id]);
+    }
+
+    void DoWork2() {
+        while (trap2.load());
+        value2 = 2000;
+        trap = false;
+        Fiber::YieldTo(fiber2, fiber1);
+        assert3 = false;
+    }
+
+    void DoWork3() {
+        std::thread::id this_id = std::this_thread::get_id();
+        u32 id = ids[this_id];
+        assert2 = id == 0;
+        value1 += 1000;
+        Fiber::YieldTo(fiber3, thread_fibers[id]);
+    }
+
+    void ExecuteThread(u32 id);
+
+    void CallFiber1() {
+        std::thread::id this_id = std::this_thread::get_id();
+        u32 id = ids[this_id];
+        Fiber::YieldTo(thread_fibers[id], fiber1);
+    }
+
+    void CallFiber2() {
+        std::thread::id this_id = std::this_thread::get_id();
+        u32 id = ids[this_id];
+        Fiber::YieldTo(thread_fibers[id], fiber2);
+    }
+
+    void Exit();
+
+    bool assert1{};
+    bool assert2{};
+    bool assert3{true};
+    u32 value1{};
+    u32 value2{};
+    std::atomic<bool> trap{true};
+    std::atomic<bool> trap2{true};
+    std::unordered_map<std::thread::id, u32> ids;
+    std::vector<std::shared_ptr<Common::Fiber>> thread_fibers;
+    std::shared_ptr<Common::Fiber> fiber1;
+    std::shared_ptr<Common::Fiber> fiber2;
+    std::shared_ptr<Common::Fiber> fiber3;
+};
+
+static void WorkControl2_1(void* control) {
+    TestControl2* test_control = static_cast<TestControl2*>(control);
+    test_control->DoWork1();
+}
+
+static void WorkControl2_2(void* control) {
+    TestControl2* test_control = static_cast<TestControl2*>(control);
+    test_control->DoWork2();
+}
+
+static void WorkControl2_3(void* control) {
+    TestControl2* test_control = static_cast<TestControl2*>(control);
+    test_control->DoWork3();
+}
+
+void TestControl2::ExecuteThread(u32 id) {
+    std::thread::id this_id = std::this_thread::get_id();
+    ids[this_id] = id;
+    auto thread_fiber = Fiber::ThreadToFiber();
+    thread_fibers[id] = thread_fiber;
+}
+
+void TestControl2::Exit() {
+    std::thread::id this_id = std::this_thread::get_id();
+    u32 id = ids[this_id];
+    thread_fibers[id]->Exit();
+}
+
+static void ThreadStart2_1(u32 id, TestControl2& test_control) {
+    test_control.ExecuteThread(id);
+    test_control.CallFiber1();
+    test_control.Exit();
+}
+
+static void ThreadStart2_2(u32 id, TestControl2& test_control) {
+    test_control.ExecuteThread(id);
+    test_control.CallFiber2();
+    test_control.Exit();
+}
+
+TEST_CASE("Fibers::InterExchange", "[common]") {
+    TestControl2 test_control{};
+    test_control.thread_fibers.resize(2, nullptr);
+    test_control.fiber1 = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_1}, &test_control);
+    test_control.fiber2 = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_2}, &test_control);
+    test_control.fiber3 = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_3}, &test_control);
+    std::thread thread1(ThreadStart2_1, 0, std::ref(test_control));
+    std::thread thread2(ThreadStart2_2, 1, std::ref(test_control));
+    thread1.join();
+    thread2.join();
+    REQUIRE(test_control.assert1);
+    REQUIRE(test_control.assert2);
+    REQUIRE(test_control.assert3);
+    REQUIRE(test_control.value2 == 7000);
+    u32 cal_value = 0;
+    for (u32 i = 0; i < 12000; i++) {
+        cal_value += i;
+    }
+    cal_value += 1000;
+    REQUIRE(test_control.value1 == cal_value);
+}
+
+
+} // namespace Common

From be320a9e10fda32a984b12cdfe3aaf09cc67b39a Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 5 Feb 2020 15:48:20 -0400
Subject: [PATCH 04/24] Common: Polish Fiber class, add comments, asserts and
 more tests.

---
 src/common/fiber.cpp        | 55 ++++++++++++---------
 src/common/fiber.h          | 14 +++++-
 src/common/spin_lock.cpp    |  7 +++
 src/common/spin_lock.h      |  1 +
 src/tests/common/fibers.cpp | 95 ++++++++++++++++++++++++++++++++++++-
 5 files changed, 147 insertions(+), 25 deletions(-)

diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
index a2c0401c4..a88a30ced 100644
--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include "common/assert.h"
 #include "common/fiber.h"
 #ifdef _MSC_VER
 #include <windows.h>
@@ -18,11 +19,11 @@ struct Fiber::FiberImpl {
 };
 
 void Fiber::start() {
-    if (previous_fiber) {
-        previous_fiber->guard.unlock();
-        previous_fiber = nullptr;
-    }
+    ASSERT(previous_fiber != nullptr);
+    previous_fiber->guard.unlock();
+    previous_fiber.reset();
     entry_point(start_parameter);
+    UNREACHABLE();
 }
 
 void __stdcall Fiber::FiberStartFunc(void* fiber_parameter)
@@ -43,12 +44,16 @@ Fiber::Fiber() : guard{}, entry_point{}, start_parameter{}, previous_fiber{} {
 
 Fiber::~Fiber() {
     // Make sure the Fiber is not being used
-    guard.lock();
-    guard.unlock();
+    bool locked = guard.try_lock();
+    ASSERT_MSG(locked, "Destroying a fiber that's still running");
+    if (locked) {
+        guard.unlock();
+    }
     DeleteFiber(impl->handle);
 }
 
 void Fiber::Exit() {
+    ASSERT_MSG(is_thread_fiber, "Exitting non main thread fiber");
     if (!is_thread_fiber) {
         return;
     }
@@ -57,14 +62,15 @@ void Fiber::Exit() {
 }
 
 void Fiber::YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to) {
+    ASSERT_MSG(from != nullptr, "Yielding fiber is null!");
+    ASSERT_MSG(to != nullptr, "Next fiber is null!");
     to->guard.lock();
     to->previous_fiber = from;
     SwitchToFiber(to->impl->handle);
     auto previous_fiber = from->previous_fiber;
-    if (previous_fiber) {
-        previous_fiber->guard.unlock();
-        previous_fiber.reset();
-    }
+    ASSERT(previous_fiber != nullptr);
+    previous_fiber->guard.unlock();
+    previous_fiber.reset();
 }
 
 std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
@@ -85,12 +91,12 @@ struct alignas(64) Fiber::FiberImpl {
 };
 
 void Fiber::start(boost::context::detail::transfer_t& transfer) {
-    if (previous_fiber) {
-        previous_fiber->impl->context = transfer.fctx;
-        previous_fiber->guard.unlock();
-        previous_fiber = nullptr;
-    }
+    ASSERT(previous_fiber != nullptr);
+    previous_fiber->impl->context = transfer.fctx;
+    previous_fiber->guard.unlock();
+    previous_fiber.reset();
     entry_point(start_parameter);
+    UNREACHABLE();
 }
 
 void Fiber::FiberStartFunc(boost::context::detail::transfer_t transfer)
@@ -113,11 +119,15 @@ Fiber::Fiber() : guard{}, entry_point{}, start_parameter{}, previous_fiber{} {
 
 Fiber::~Fiber() {
     // Make sure the Fiber is not being used
-    guard.lock();
-    guard.unlock();
+    bool locked = guard.try_lock();
+    ASSERT_MSG(locked, "Destroying a fiber that's still running");
+    if (locked) {
+        guard.unlock();
+    }
 }
 
 void Fiber::Exit() {
+    ASSERT_MSG(is_thread_fiber, "Exitting non main thread fiber");
     if (!is_thread_fiber) {
         return;
     }
@@ -125,15 +135,16 @@ void Fiber::Exit() {
 }
 
 void Fiber::YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to) {
+    ASSERT_MSG(from != nullptr, "Yielding fiber is null!");
+    ASSERT_MSG(to != nullptr, "Next fiber is null!");
     to->guard.lock();
     to->previous_fiber = from;
     auto transfer = boost::context::detail::jump_fcontext(to->impl.context, nullptr);
     auto previous_fiber = from->previous_fiber;
-    if (previous_fiber) {
-        previous_fiber->impl->context = transfer.fctx;
-        previous_fiber->guard.unlock();
-        previous_fiber.reset();
-    }
+    ASSERT(previous_fiber != nullptr);
+    previous_fiber->impl->context = transfer.fctx;
+    previous_fiber->guard.unlock();
+    previous_fiber.reset();
 }
 
 std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
diff --git a/src/common/fiber.h b/src/common/fiber.h
index 812d6644a..89a01fdd8 100644
--- a/src/common/fiber.h
+++ b/src/common/fiber.h
@@ -18,6 +18,18 @@ namespace boost::context::detail {
 
 namespace Common {
 
+/**
+ * Fiber class
+ * a fiber is a userspace thread with it's own context. They can be used to
+ * implement coroutines, emulated threading systems and certain asynchronous
+ * patterns.
+ *
+ * This class implements fibers at a low level, thus allowing greater freedom
+ * to implement such patterns. This fiber class is 'threadsafe' only one fiber
+ * can be running at a time and threads will be locked while trying to yield to
+ * a running fiber until it yields. WARNING exchanging two running fibers between
+ * threads will cause a deadlock.
+ */
 class Fiber {
 public:
     Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter);
@@ -53,8 +65,6 @@ private:
     static void FiberStartFunc(boost::context::detail::transfer_t transfer);
 #endif
 
-
-
     struct FiberImpl;
 
     SpinLock guard;
diff --git a/src/common/spin_lock.cpp b/src/common/spin_lock.cpp
index 8077b78d2..82a1d39ff 100644
--- a/src/common/spin_lock.cpp
+++ b/src/common/spin_lock.cpp
@@ -43,4 +43,11 @@ void SpinLock::unlock() {
     lck.clear(std::memory_order_release);
 }
 
+bool SpinLock::try_lock() {
+    if (lck.test_and_set(std::memory_order_acquire)) {
+        return false;
+    }
+    return true;
+}
+
 } // namespace Common
diff --git a/src/common/spin_lock.h b/src/common/spin_lock.h
index cbc67b6c8..70282a961 100644
--- a/src/common/spin_lock.h
+++ b/src/common/spin_lock.h
@@ -12,6 +12,7 @@ class SpinLock {
 public:
     void lock();
     void unlock();
+    bool try_lock();
 
 private:
     std::atomic_flag lck = ATOMIC_FLAG_INIT;
diff --git a/src/tests/common/fibers.cpp b/src/tests/common/fibers.cpp
index ff840afa6..358393a19 100644
--- a/src/tests/common/fibers.cpp
+++ b/src/tests/common/fibers.cpp
@@ -64,7 +64,9 @@ static void ThreadStart1(u32 id, TestControl1& test_control) {
     test_control.ExecuteThread(id);
 }
 
-
+/** This test checks for fiber setup configuration and validates that fibers are
+ *  doing all the work required.
+ */
 TEST_CASE("Fibers::Setup", "[common]") {
     constexpr u32 num_threads = 7;
     TestControl1 test_control{};
@@ -188,6 +190,10 @@ static void ThreadStart2_2(u32 id, TestControl2& test_control) {
     test_control.Exit();
 }
 
+/** This test checks for fiber thread exchange configuration and validates that fibers are
+ *  that a fiber has been succesfully transfered from one thread to another and that the TLS
+ *  region of the thread is kept while changing fibers.
+ */
 TEST_CASE("Fibers::InterExchange", "[common]") {
     TestControl2 test_control{};
     test_control.thread_fibers.resize(2, nullptr);
@@ -210,5 +216,92 @@ TEST_CASE("Fibers::InterExchange", "[common]") {
     REQUIRE(test_control.value1 == cal_value);
 }
 
+class TestControl3 {
+public:
+    TestControl3() = default;
+
+    void DoWork1() {
+        value1 += 1;
+        Fiber::YieldTo(fiber1, fiber2);
+        std::thread::id this_id = std::this_thread::get_id();
+        u32 id = ids[this_id];
+        value3 += 1;
+        Fiber::YieldTo(fiber1, thread_fibers[id]);
+    }
+
+    void DoWork2() {
+        value2 += 1;
+        std::thread::id this_id = std::this_thread::get_id();
+        u32 id = ids[this_id];
+        Fiber::YieldTo(fiber2, thread_fibers[id]);
+    }
+
+    void ExecuteThread(u32 id);
+
+    void CallFiber1() {
+        std::thread::id this_id = std::this_thread::get_id();
+        u32 id = ids[this_id];
+        Fiber::YieldTo(thread_fibers[id], fiber1);
+    }
+
+    void Exit();
+
+    u32 value1{};
+    u32 value2{};
+    u32 value3{};
+    std::unordered_map<std::thread::id, u32> ids;
+    std::vector<std::shared_ptr<Common::Fiber>> thread_fibers;
+    std::shared_ptr<Common::Fiber> fiber1;
+    std::shared_ptr<Common::Fiber> fiber2;
+};
+
+static void WorkControl3_1(void* control) {
+    TestControl3* test_control = static_cast<TestControl3*>(control);
+    test_control->DoWork1();
+}
+
+static void WorkControl3_2(void* control) {
+    TestControl3* test_control = static_cast<TestControl3*>(control);
+    test_control->DoWork2();
+}
+
+void TestControl3::ExecuteThread(u32 id) {
+    std::thread::id this_id = std::this_thread::get_id();
+    ids[this_id] = id;
+    auto thread_fiber = Fiber::ThreadToFiber();
+    thread_fibers[id] = thread_fiber;
+}
+
+void TestControl3::Exit() {
+    std::thread::id this_id = std::this_thread::get_id();
+    u32 id = ids[this_id];
+    thread_fibers[id]->Exit();
+}
+
+static void ThreadStart3(u32 id, TestControl3& test_control) {
+    test_control.ExecuteThread(id);
+    test_control.CallFiber1();
+    test_control.Exit();
+}
+
+/** This test checks for one two threads racing for starting the same fiber.
+ *  It checks execution occured in an ordered manner and by no time there were
+ *  two contexts at the same time.
+ */
+TEST_CASE("Fibers::StartRace", "[common]") {
+    TestControl3 test_control{};
+    test_control.thread_fibers.resize(2, nullptr);
+    test_control.fiber1 = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl3_1}, &test_control);
+    test_control.fiber2 = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl3_2}, &test_control);
+    std::thread thread1(ThreadStart3, 0, std::ref(test_control));
+    std::thread thread2(ThreadStart3, 1, std::ref(test_control));
+    thread1.join();
+    thread2.join();
+    REQUIRE(test_control.value1 == 1);
+    REQUIRE(test_control.value2 == 1);
+    REQUIRE(test_control.value3 == 1);
+}
+
+
 
 } // namespace Common

From 62e35ffc0effddfacb73ebc766735148436d7331 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 5 Feb 2020 19:12:27 -0400
Subject: [PATCH 05/24] Core: Implement a Host Timer.

---
 src/core/CMakeLists.txt       |   2 +
 src/core/core_timing_util.cpp |   5 ++
 src/core/core_timing_util.h   |   1 +
 src/core/host_timing.cpp      | 161 ++++++++++++++++++++++++++++++++++
 src/core/host_timing.h        | 126 ++++++++++++++++++++++++++
 5 files changed, 295 insertions(+)
 create mode 100644 src/core/host_timing.cpp
 create mode 100644 src/core/host_timing.h

diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 47418006b..c0d068376 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -547,6 +547,8 @@ add_library(core STATIC
     hle/service/vi/vi_u.h
     hle/service/wlan/wlan.cpp
     hle/service/wlan/wlan.h
+    host_timing.cpp
+    host_timing.h
     loader/deconstructed_rom_directory.cpp
     loader/deconstructed_rom_directory.h
     loader/elf.cpp
diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp
index de50d3b14..f42666b4d 100644
--- a/src/core/core_timing_util.cpp
+++ b/src/core/core_timing_util.cpp
@@ -49,6 +49,11 @@ s64 nsToCycles(std::chrono::nanoseconds ns) {
     return (Hardware::BASE_CLOCK_RATE * ns.count()) / 1000000000;
 }
 
+u64 nsToClockCycles(std::chrono::nanoseconds ns) {
+    const u128 temporal = Common::Multiply64Into128(ns.count(), CNTFREQ);
+    return Common::Divide128On32(temporal, 1000000000).first;
+}
+
 u64 CpuCyclesToClockCycles(u64 ticks) {
     const u128 temporal = Common::Multiply64Into128(ticks, Hardware::CNTFREQ);
     return Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h
index addc72b19..65fb7368b 100644
--- a/src/core/core_timing_util.h
+++ b/src/core/core_timing_util.h
@@ -13,6 +13,7 @@ namespace Core::Timing {
 s64 msToCycles(std::chrono::milliseconds ms);
 s64 usToCycles(std::chrono::microseconds us);
 s64 nsToCycles(std::chrono::nanoseconds ns);
+u64 nsToClockCycles(std::chrono::nanoseconds ns);
 
 inline std::chrono::milliseconds CyclesToMs(s64 cycles) {
     return std::chrono::milliseconds(cycles * 1000 / Hardware::BASE_CLOCK_RATE);
diff --git a/src/core/host_timing.cpp b/src/core/host_timing.cpp
new file mode 100644
index 000000000..c02f571c6
--- /dev/null
+++ b/src/core/host_timing.cpp
@@ -0,0 +1,161 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/host_timing.h"
+
+#include <algorithm>
+#include <mutex>
+#include <string>
+#include <tuple>
+
+#include "common/assert.h"
+#include "common/thread.h"
+#include "core/core_timing_util.h"
+
+namespace Core::HostTiming {
+
+std::shared_ptr<EventType> CreateEvent(std::string name, TimedCallback&& callback) {
+    return std::make_shared<EventType>(std::move(callback), std::move(name));
+}
+
+struct CoreTiming::Event {
+    u64 time;
+    u64 fifo_order;
+    u64 userdata;
+    std::weak_ptr<EventType> type;
+
+    // Sort by time, unless the times are the same, in which case sort by
+    // the order added to the queue
+    friend bool operator>(const Event& left, const Event& right) {
+        return std::tie(left.time, left.fifo_order) > std::tie(right.time, right.fifo_order);
+    }
+
+    friend bool operator<(const Event& left, const Event& right) {
+        return std::tie(left.time, left.fifo_order) < std::tie(right.time, right.fifo_order);
+    }
+};
+
+CoreTiming::CoreTiming() = default;
+CoreTiming::~CoreTiming() = default;
+
+void CoreTiming::ThreadEntry(CoreTiming& instance) {
+    instance.Advance();
+}
+
+void CoreTiming::Initialize() {
+    event_fifo_id = 0;
+    const auto empty_timed_callback = [](u64, s64) {};
+    ev_lost = CreateEvent("_lost_event", empty_timed_callback);
+    start_time = std::chrono::system_clock::now();
+    timer_thread = std::make_unique<std::thread>(ThreadEntry, std::ref(*this));
+}
+
+void CoreTiming::Shutdown() {
+    std::unique_lock<std::mutex> guard(inner_mutex);
+    shutting_down = true;
+    if (!is_set) {
+        is_set = true;
+        condvar.notify_one();
+    }
+    inner_mutex.unlock();
+    timer_thread->join();
+    ClearPendingEvents();
+}
+
+void CoreTiming::ScheduleEvent(s64 ns_into_future, const std::shared_ptr<EventType>& event_type,
+                               u64 userdata) {
+    std::lock_guard guard{inner_mutex};
+    const u64 timeout = static_cast<u64>(GetGlobalTimeNs().count() + ns_into_future);
+
+    event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type});
+
+    std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>());
+    if (!is_set) {
+        is_set = true;
+        condvar.notify_one();
+    }
+}
+
+void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u64 userdata) {
+    std::lock_guard guard{inner_mutex};
+
+    const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
+        return e.type.lock().get() == event_type.get() && e.userdata == userdata;
+    });
+
+    // Removing random items breaks the invariant so we have to re-establish it.
+    if (itr != event_queue.end()) {
+        event_queue.erase(itr, event_queue.end());
+        std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>());
+    }
+}
+
+u64 CoreTiming::GetCPUTicks() const {
+    std::chrono::nanoseconds time_now = GetGlobalTimeNs();
+    return Core::Timing::nsToCycles(time_now);
+}
+
+u64 CoreTiming::GetClockTicks() const {
+    std::chrono::nanoseconds time_now = GetGlobalTimeNs();
+    return Core::Timing::nsToClockCycles(time_now);
+}
+
+void CoreTiming::ClearPendingEvents() {
+    event_queue.clear();
+}
+
+void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) {
+    std::lock_guard guard{inner_mutex};
+
+    const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
+        return e.type.lock().get() == event_type.get();
+    });
+
+    // Removing random items breaks the invariant so we have to re-establish it.
+    if (itr != event_queue.end()) {
+        event_queue.erase(itr, event_queue.end());
+        std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>());
+    }
+}
+
+void CoreTiming::Advance() {
+    while (true) {
+        std::unique_lock<std::mutex> guard(inner_mutex);
+
+        global_timer = GetGlobalTimeNs().count();
+
+        while (!event_queue.empty() && event_queue.front().time <= global_timer) {
+            Event evt = std::move(event_queue.front());
+            std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>());
+            event_queue.pop_back();
+            inner_mutex.unlock();
+
+            if (auto event_type{evt.type.lock()}) {
+                event_type->callback(evt.userdata, global_timer - evt.time);
+            }
+
+            inner_mutex.lock();
+        }
+        auto next_time = std::chrono::nanoseconds(event_queue.front().time - global_timer);
+        condvar.wait_for(guard, next_time, [this] { return is_set; });
+        is_set = false;
+        if (shutting_down) {
+            break;
+        }
+    }
+}
+
+std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const {
+    sys_time_point current = std::chrono::system_clock::now();
+    auto elapsed = current - start_time;
+    return std::chrono::duration_cast<std::chrono::nanoseconds>(elapsed);
+}
+
+std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
+    sys_time_point current = std::chrono::system_clock::now();
+    auto elapsed = current - start_time;
+    return std::chrono::duration_cast<std::chrono::microseconds>(elapsed);
+}
+
+} // namespace Core::Timing
diff --git a/src/core/host_timing.h b/src/core/host_timing.h
new file mode 100644
index 000000000..a3a32e087
--- /dev/null
+++ b/src/core/host_timing.h
@@ -0,0 +1,126 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <chrono>
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <optional>
+#include <string>
+#include <thread>
+#include <vector>
+
+#include "common/common_types.h"
+#include "common/threadsafe_queue.h"
+
+namespace Core::HostTiming {
+
+/// A callback that may be scheduled for a particular core timing event.
+using TimedCallback = std::function<void(u64 userdata, s64 cycles_late)>;
+using sys_time_point = std::chrono::time_point<std::chrono::system_clock>;
+
+/// Contains the characteristics of a particular event.
+struct EventType {
+    EventType(TimedCallback&& callback, std::string&& name)
+        : callback{std::move(callback)}, name{std::move(name)} {}
+
+    /// The event's callback function.
+    TimedCallback callback;
+    /// A pointer to the name of the event.
+    const std::string name;
+};
+
+/**
+ * This is a system to schedule events into the emulated machine's future. Time is measured
+ * in main CPU clock cycles.
+ *
+ * To schedule an event, you first have to register its type. This is where you pass in the
+ * callback. You then schedule events using the type id you get back.
+ *
+ * The int cyclesLate that the callbacks get is how many cycles late it was.
+ * So to schedule a new event on a regular basis:
+ * inside callback:
+ *   ScheduleEvent(periodInCycles - cyclesLate, callback, "whatever")
+ */
+class CoreTiming {
+public:
+    CoreTiming();
+    ~CoreTiming();
+
+    CoreTiming(const CoreTiming&) = delete;
+    CoreTiming(CoreTiming&&) = delete;
+
+    CoreTiming& operator=(const CoreTiming&) = delete;
+    CoreTiming& operator=(CoreTiming&&) = delete;
+
+    /// CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is
+    /// required to end slice - 1 and start slice 0 before the first cycle of code is executed.
+    void Initialize();
+
+    /// Tears down all timing related functionality.
+    void Shutdown();
+
+    /// Schedules an event in core timing
+    void ScheduleEvent(s64 ns_into_future, const std::shared_ptr<EventType>& event_type,
+                       u64 userdata = 0);
+
+    void UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u64 userdata);
+
+    /// We only permit one event of each type in the queue at a time.
+    void RemoveEvent(const std::shared_ptr<EventType>& event_type);
+
+    /// Returns current time in emulated CPU cycles
+    u64 GetCPUTicks() const;
+
+    /// Returns current time in emulated in Clock cycles
+    u64 GetClockTicks() const;
+
+    /// Returns current time in microseconds.
+    std::chrono::microseconds GetGlobalTimeUs() const;
+
+    /// Returns current time in nanoseconds.
+    std::chrono::nanoseconds GetGlobalTimeNs() const;
+
+private:
+    struct Event;
+
+    /// Clear all pending events. This should ONLY be done on exit.
+    void ClearPendingEvents();
+
+    static void ThreadEntry(CoreTiming& instance);
+    void Advance();
+
+    sys_time_point start_time;
+
+    u64 global_timer = 0;
+
+    std::chrono::nanoseconds start_point;
+
+    // The queue is a min-heap using std::make_heap/push_heap/pop_heap.
+    // We don't use std::priority_queue because we need to be able to serialize, unserialize and
+    // erase arbitrary events (RemoveEvent()) regardless of the queue order. These aren't
+    // accomodated by the standard adaptor class.
+    std::vector<Event> event_queue;
+    u64 event_fifo_id = 0;
+
+    std::shared_ptr<EventType> ev_lost;
+    bool is_set = false;
+    std::condition_variable condvar;
+    std::mutex inner_mutex;
+    std::unique_ptr<std::thread> timer_thread;
+    std::atomic<bool> shutting_down{};
+};
+
+/// Creates a core timing event with the given name and callback.
+///
+/// @param name     The name of the core timing event to create.
+/// @param callback The callback to execute for the event.
+///
+/// @returns An EventType instance representing the created event.
+///
+std::shared_ptr<EventType> CreateEvent(std::string name, TimedCallback&& callback);
+
+} // namespace Core::Timing

From 0f8e5a146563d1f245f8f62cb931dc1e0b55de2f Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 8 Feb 2020 12:48:57 -0400
Subject: [PATCH 06/24] Tests: Add base tests to host timing

---
 src/common/thread.h            |   4 +-
 src/core/host_timing.cpp       |  97 +++++++++++++--------
 src/core/host_timing.h         |  30 ++++++-
 src/tests/CMakeLists.txt       |   1 +
 src/tests/core/host_timing.cpp | 150 +++++++++++++++++++++++++++++++++
 5 files changed, 241 insertions(+), 41 deletions(-)
 create mode 100644 src/tests/core/host_timing.cpp

diff --git a/src/common/thread.h b/src/common/thread.h
index 2fc071685..127cc7e23 100644
--- a/src/common/thread.h
+++ b/src/common/thread.h
@@ -9,6 +9,7 @@
 #include <cstddef>
 #include <mutex>
 #include <thread>
+#include "common/common_types.h"
 
 namespace Common {
 
@@ -28,8 +29,7 @@ public:
         is_set = false;
     }
 
-    template <class Duration>
-    bool WaitFor(const std::chrono::duration<Duration>& time) {
+    bool WaitFor(const std::chrono::nanoseconds& time) {
         std::unique_lock lk{mutex};
         if (!condvar.wait_for(lk, time, [this] { return is_set; }))
             return false;
diff --git a/src/core/host_timing.cpp b/src/core/host_timing.cpp
index c02f571c6..d9514b2c5 100644
--- a/src/core/host_timing.cpp
+++ b/src/core/host_timing.cpp
@@ -10,7 +10,6 @@
 #include <tuple>
 
 #include "common/assert.h"
-#include "common/thread.h"
 #include "core/core_timing_util.h"
 
 namespace Core::HostTiming {
@@ -47,39 +46,55 @@ void CoreTiming::Initialize() {
     event_fifo_id = 0;
     const auto empty_timed_callback = [](u64, s64) {};
     ev_lost = CreateEvent("_lost_event", empty_timed_callback);
-    start_time = std::chrono::system_clock::now();
+    start_time = std::chrono::steady_clock::now();
     timer_thread = std::make_unique<std::thread>(ThreadEntry, std::ref(*this));
 }
 
 void CoreTiming::Shutdown() {
-    std::unique_lock<std::mutex> guard(inner_mutex);
+    paused = true;
     shutting_down = true;
-    if (!is_set) {
-        is_set = true;
-        condvar.notify_one();
-    }
-    inner_mutex.unlock();
+    event.Set();
     timer_thread->join();
     ClearPendingEvents();
+    timer_thread.reset();
+    has_started = false;
+}
+
+void CoreTiming::Pause(bool is_paused) {
+    paused = is_paused;
+}
+
+void CoreTiming::SyncPause(bool is_paused) {
+    if (is_paused == paused && paused_set == paused) {
+        return;
+    }
+    Pause(is_paused);
+    event.Set();
+    while (paused_set != is_paused);
+}
+
+bool CoreTiming::IsRunning() {
+    return !paused_set;
+}
+
+bool CoreTiming::HasPendingEvents() {
+    return !(wait_set && event_queue.empty());
 }
 
 void CoreTiming::ScheduleEvent(s64 ns_into_future, const std::shared_ptr<EventType>& event_type,
                                u64 userdata) {
-    std::lock_guard guard{inner_mutex};
+    basic_lock.lock();
     const u64 timeout = static_cast<u64>(GetGlobalTimeNs().count() + ns_into_future);
 
     event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type});
 
     std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>());
-    if (!is_set) {
-        is_set = true;
-        condvar.notify_one();
-    }
+    basic_lock.unlock();
+    event.Set();
 }
 
 void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u64 userdata) {
-    std::lock_guard guard{inner_mutex};
-
+    basic_lock.lock();
     const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
         return e.type.lock().get() == event_type.get() && e.userdata == userdata;
     });
@@ -89,6 +104,7 @@ void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u
         event_queue.erase(itr, event_queue.end());
         std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>());
     }
+    basic_lock.unlock();
 }
 
 u64 CoreTiming::GetCPUTicks() const {
@@ -106,7 +122,7 @@ void CoreTiming::ClearPendingEvents() {
 }
 
 void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) {
-    std::lock_guard guard{inner_mutex};
+    basic_lock.lock();
 
     const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
         return e.type.lock().get() == event_type.get();
@@ -117,43 +133,54 @@ void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) {
         event_queue.erase(itr, event_queue.end());
         std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>());
     }
+    basic_lock.unlock();
 }
 
 void CoreTiming::Advance() {
-    while (true) {
-        std::unique_lock<std::mutex> guard(inner_mutex);
+    has_started = true;
+    while (!shutting_down) {
+        while (!paused) {
+            paused_set = false;
+            basic_lock.lock();
+            global_timer = GetGlobalTimeNs().count();
 
-        global_timer = GetGlobalTimeNs().count();
+            while (!event_queue.empty() && event_queue.front().time <= global_timer) {
+                Event evt = std::move(event_queue.front());
+                std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>());
+                event_queue.pop_back();
+                basic_lock.unlock();
 
-        while (!event_queue.empty() && event_queue.front().time <= global_timer) {
-            Event evt = std::move(event_queue.front());
-            std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>());
-            event_queue.pop_back();
-            inner_mutex.unlock();
+                if (auto event_type{evt.type.lock()}) {
+                    event_type->callback(evt.userdata, global_timer - evt.time);
+                }
 
-            if (auto event_type{evt.type.lock()}) {
-                event_type->callback(evt.userdata, global_timer - evt.time);
+                basic_lock.lock();
             }
 
-            inner_mutex.lock();
-        }
-        auto next_time = std::chrono::nanoseconds(event_queue.front().time - global_timer);
-        condvar.wait_for(guard, next_time, [this] { return is_set; });
-        is_set = false;
-        if (shutting_down) {
-            break;
+            if (!event_queue.empty()) {
+                std::chrono::nanoseconds next_time = std::chrono::nanoseconds(event_queue.front().time - global_timer);
+                basic_lock.unlock();
+                event.WaitFor(next_time);
+            } else {
+                basic_lock.unlock();
+                wait_set = true;
+                event.Wait();
+            }
+
+            wait_set = false;
         }
+        paused_set = true;
     }
 }
 
 std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const {
-    sys_time_point current = std::chrono::system_clock::now();
+    sys_time_point current = std::chrono::steady_clock::now();
     auto elapsed = current - start_time;
     return std::chrono::duration_cast<std::chrono::nanoseconds>(elapsed);
 }
 
 std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
-    sys_time_point current = std::chrono::system_clock::now();
+    sys_time_point current = std::chrono::steady_clock::now();
     auto elapsed = current - start_time;
     return std::chrono::duration_cast<std::chrono::microseconds>(elapsed);
 }
diff --git a/src/core/host_timing.h b/src/core/host_timing.h
index a3a32e087..1d053a7fa 100644
--- a/src/core/host_timing.h
+++ b/src/core/host_timing.h
@@ -14,13 +14,15 @@
 #include <vector>
 
 #include "common/common_types.h"
+#include "common/spin_lock.h"
+#include "common/thread.h"
 #include "common/threadsafe_queue.h"
 
 namespace Core::HostTiming {
 
 /// A callback that may be scheduled for a particular core timing event.
 using TimedCallback = std::function<void(u64 userdata, s64 cycles_late)>;
-using sys_time_point = std::chrono::time_point<std::chrono::system_clock>;
+using sys_time_point = std::chrono::time_point<std::chrono::steady_clock>;
 
 /// Contains the characteristics of a particular event.
 struct EventType {
@@ -63,6 +65,23 @@ public:
     /// Tears down all timing related functionality.
     void Shutdown();
 
+    /// Pauses/Unpauses the execution of the timer thread.
+    void Pause(bool is_paused);
+
+    /// Pauses/Unpauses the execution of the timer thread and waits until paused.
+    void SyncPause(bool is_paused);
+
+    /// Checks if core timing is running.
+    bool IsRunning();
+
+    /// Checks if the timer thread has started.
+    bool HasStarted() {
+        return has_started;
+    }
+
+    /// Checks if there are any pending time events.
+    bool HasPendingEvents();
+
     /// Schedules an event in core timing
     void ScheduleEvent(s64 ns_into_future, const std::shared_ptr<EventType>& event_type,
                        u64 userdata = 0);
@@ -107,11 +126,14 @@ private:
     u64 event_fifo_id = 0;
 
     std::shared_ptr<EventType> ev_lost;
-    bool is_set = false;
-    std::condition_variable condvar;
-    std::mutex inner_mutex;
+    Common::Event event{};
+    Common::SpinLock basic_lock{};
     std::unique_ptr<std::thread> timer_thread;
+    std::atomic<bool> paused{};
+    std::atomic<bool> paused_set{};
+    std::atomic<bool> wait_set{};
     std::atomic<bool> shutting_down{};
+    std::atomic<bool> has_started{};
 };
 
 /// Creates a core timing event with the given name and callback.
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 47ef30aa9..3f750b51c 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -8,6 +8,7 @@ add_executable(tests
     core/arm/arm_test_common.cpp
     core/arm/arm_test_common.h
     core/core_timing.cpp
+    core/host_timing.cpp
     tests.cpp
 )
 
diff --git a/src/tests/core/host_timing.cpp b/src/tests/core/host_timing.cpp
new file mode 100644
index 000000000..ca9c8e50a
--- /dev/null
+++ b/src/tests/core/host_timing.cpp
@@ -0,0 +1,150 @@
+// Copyright 2016 Dolphin Emulator Project / 2017 Dolphin Emulator Project
+// Licensed under GPLv2+
+// Refer to the license.txt file included.
+
+#include <catch2/catch.hpp>
+
+#include <array>
+#include <bitset>
+#include <cstdlib>
+#include <memory>
+#include <string>
+
+#include "common/file_util.h"
+#include "core/core.h"
+#include "core/host_timing.h"
+
+// Numbers are chosen randomly to make sure the correct one is given.
+static constexpr std::array<u64, 5> CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}};
+static constexpr int MAX_SLICE_LENGTH = 10000; // Copied from CoreTiming internals
+static constexpr std::array<u64, 5> calls_order{{2,0,1,4,3}};
+static std::array<s64, 5> delays{};
+
+static std::bitset<CB_IDS.size()> callbacks_ran_flags;
+static u64 expected_callback = 0;
+static s64 lateness = 0;
+
+template <unsigned int IDX>
+void HostCallbackTemplate(u64 userdata, s64 nanoseconds_late) {
+    static_assert(IDX < CB_IDS.size(), "IDX out of range");
+    callbacks_ran_flags.set(IDX);
+    REQUIRE(CB_IDS[IDX] == userdata);
+    REQUIRE(CB_IDS[IDX] == CB_IDS[calls_order[expected_callback]]);
+    delays[IDX] = nanoseconds_late;
+    ++expected_callback;
+}
+
+static u64 callbacks_done = 0;
+
+struct ScopeInit final {
+    ScopeInit() {
+        core_timing.Initialize();
+    }
+    ~ScopeInit() {
+        core_timing.Shutdown();
+    }
+
+    Core::HostTiming::CoreTiming core_timing;
+};
+
+TEST_CASE("HostTiming[BasicOrder]", "[core]") {
+    ScopeInit guard;
+    auto& core_timing = guard.core_timing;
+    std::vector<std::shared_ptr<Core::HostTiming::EventType>> events;
+    events.resize(5);
+    events[0] =
+        Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>);
+    events[1] =
+        Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>);
+    events[2] =
+        Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>);
+    events[3] =
+        Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>);
+    events[4] =
+        Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>);
+
+    expected_callback = 0;
+
+    core_timing.SyncPause(true);
+
+    u64 one_micro = 1000U;
+    for (std::size_t i = 0; i < events.size(); i++) {
+        u64 order = calls_order[i];
+        core_timing.ScheduleEvent(i*one_micro + 100U, events[order], CB_IDS[order]);
+    }
+    /// test pause
+    REQUIRE(callbacks_ran_flags.none());
+
+    core_timing.Pause(false); // No need to sync
+
+    while (core_timing.HasPendingEvents());
+
+    REQUIRE(callbacks_ran_flags.all());
+
+    for (std::size_t i = 0; i < delays.size(); i++) {
+        const double delay = static_cast<double>(delays[i]);
+        const double micro = delay / 1000.0f;
+        const double mili = micro / 1000.0f;
+        printf("HostTimer Pausing Delay[%zu]: %.3f %.6f\n", i, micro, mili);
+    }
+}
+
+#pragma optimize("", off)
+u64 TestTimerSpeed(Core::HostTiming::CoreTiming& core_timing) {
+    u64 start = core_timing.GetGlobalTimeNs().count();
+    u64 placebo = 0;
+    for (std::size_t i = 0; i < 1000; i++) {
+        placebo += core_timing.GetGlobalTimeNs().count();
+    }
+    u64 end = core_timing.GetGlobalTimeNs().count();
+    return (end - start);
+}
+#pragma optimize("", on)
+
+TEST_CASE("HostTiming[BasicOrderNoPausing]", "[core]") {
+    ScopeInit guard;
+    auto& core_timing = guard.core_timing;
+    std::vector<std::shared_ptr<Core::HostTiming::EventType>> events;
+    events.resize(5);
+    events[0] =
+        Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>);
+    events[1] =
+        Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>);
+    events[2] =
+        Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>);
+    events[3] =
+        Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>);
+    events[4] =
+        Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>);
+
+    core_timing.SyncPause(true);
+    core_timing.SyncPause(false);
+
+    expected_callback = 0;
+
+    u64 start = core_timing.GetGlobalTimeNs().count();
+    u64 one_micro = 1000U;
+    for (std::size_t i = 0; i < events.size(); i++) {
+        u64 order = calls_order[i];
+        core_timing.ScheduleEvent(i*one_micro + 100U, events[order], CB_IDS[order]);
+    }
+    u64 end = core_timing.GetGlobalTimeNs().count();
+    const double scheduling_time = static_cast<double>(end - start);
+    const double timer_time = static_cast<double>(TestTimerSpeed(core_timing));
+
+    while (core_timing.HasPendingEvents());
+
+    REQUIRE(callbacks_ran_flags.all());
+
+    for (std::size_t i = 0; i < delays.size(); i++) {
+        const double delay = static_cast<double>(delays[i]);
+        const double micro = delay / 1000.0f;
+        const double mili = micro / 1000.0f;
+        printf("HostTimer No Pausing Delay[%zu]: %.3f %.6f\n", i, micro, mili);
+    }
+
+    const double micro = scheduling_time / 1000.0f;
+    const double mili = micro / 1000.0f;
+    printf("HostTimer No Pausing Scheduling Time: %.3f %.6f\n", micro, mili);
+    printf("HostTimer No Pausing Timer Time: %.3f %.6f\n", timer_time / 1000.f, timer_time / 1000000.f);
+}

From 234b5ff6a999d7d69cdcdf214e0c3984cdab11cf Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sun, 9 Feb 2020 16:53:22 -0400
Subject: [PATCH 07/24] Common: Implement WallClock Interface and implement a
 native clock for x64

---
 src/common/CMakeLists.txt       |   4 +
 src/common/wall_clock.cpp       |  90 ++++++++++++++++++++++
 src/common/wall_clock.h         |  40 ++++++++++
 src/common/x64/cpu_detect.cpp   |  33 ++++++++
 src/common/x64/cpu_detect.h     |  12 +++
 src/common/x64/native_clock.cpp | 128 ++++++++++++++++++++++++++++++++
 src/common/x64/native_clock.h   |  41 ++++++++++
 src/core/host_timing.cpp        |  21 +++---
 src/core/host_timing.h          |   4 +-
 src/tests/core/host_timing.cpp  |  45 +++++------
 10 files changed, 378 insertions(+), 40 deletions(-)
 create mode 100644 src/common/wall_clock.cpp
 create mode 100644 src/common/wall_clock.h
 create mode 100644 src/common/x64/native_clock.cpp
 create mode 100644 src/common/x64/native_clock.h

diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 554d6e253..aacea0ab7 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -167,6 +167,8 @@ add_library(common STATIC
     vector_math.h
     virtual_buffer.cpp
     virtual_buffer.h
+    wall_clock.cpp
+    wall_clock.h
     web_result.h
     zstd_compression.cpp
     zstd_compression.h
@@ -177,6 +179,8 @@ if(ARCHITECTURE_x86_64)
         PRIVATE
             x64/cpu_detect.cpp
             x64/cpu_detect.h
+            x64/native_clock.cpp
+            x64/native_clock.h
             x64/xbyak_abi.h
             x64/xbyak_util.h
     )
diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp
new file mode 100644
index 000000000..eabbba9da
--- /dev/null
+++ b/src/common/wall_clock.cpp
@@ -0,0 +1,90 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/uint128.h"
+#include "common/wall_clock.h"
+
+#ifdef ARCHITECTURE_x86_64
+#include "common/x64/cpu_detect.h"
+#include "common/x64/native_clock.h"
+#endif
+
+namespace Common {
+
+using base_timer = std::chrono::steady_clock;
+using base_time_point = std::chrono::time_point<base_timer>;
+
+class StandardWallClock : public WallClock {
+public:
+    StandardWallClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency)
+        : WallClock(emulated_cpu_frequency, emulated_clock_frequency, false) {
+        start_time = base_timer::now();
+    }
+
+    std::chrono::nanoseconds GetTimeNS() override {
+        base_time_point current = base_timer::now();
+        auto elapsed = current - start_time;
+        return std::chrono::duration_cast<std::chrono::nanoseconds>(elapsed);
+    }
+
+    std::chrono::microseconds GetTimeUS() override {
+        base_time_point current = base_timer::now();
+        auto elapsed = current - start_time;
+        return std::chrono::duration_cast<std::chrono::microseconds>(elapsed);
+    }
+
+    std::chrono::milliseconds GetTimeMS() override {
+        base_time_point current = base_timer::now();
+        auto elapsed = current - start_time;
+        return std::chrono::duration_cast<std::chrono::milliseconds>(elapsed);
+    }
+
+    u64 GetClockCycles() override {
+        std::chrono::nanoseconds time_now = GetTimeNS();
+        const u128 temporal = Common::Multiply64Into128(time_now.count(), emulated_clock_frequency);
+        return Common::Divide128On32(temporal, 1000000000).first;
+    }
+
+    u64 GetCPUCycles() override {
+        std::chrono::nanoseconds time_now = GetTimeNS();
+        const u128 temporal = Common::Multiply64Into128(time_now.count(), emulated_cpu_frequency);
+        return Common::Divide128On32(temporal, 1000000000).first;
+    }
+
+private:
+    base_time_point start_time;
+};
+
+#ifdef ARCHITECTURE_x86_64
+
+WallClock* CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency) {
+    const auto& caps = GetCPUCaps();
+    u64 rtsc_frequency = 0;
+    if (caps.invariant_tsc) {
+        if (caps.base_frequency != 0) {
+            rtsc_frequency = static_cast<u64>(caps.base_frequency) * 1000000U;
+        }
+        if (rtsc_frequency == 0) {
+            rtsc_frequency = EstimateRDTSCFrequency();
+        }
+    }
+    if (rtsc_frequency == 0) {
+        return static_cast<WallClock*>(
+            new StandardWallClock(emulated_cpu_frequency, emulated_clock_frequency));
+    } else {
+        return static_cast<WallClock*>(
+            new X64::NativeClock(emulated_cpu_frequency, emulated_clock_frequency, rtsc_frequency));
+    }
+}
+
+#else
+
+WallClock* CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency) {
+    return static_cast<WallClock*>(
+        new StandardWallClock(emulated_cpu_frequency, emulated_clock_frequency));
+}
+
+#endif
+
+} // namespace Common
diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h
new file mode 100644
index 000000000..6f763d74b
--- /dev/null
+++ b/src/common/wall_clock.h
@@ -0,0 +1,40 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <chrono>
+
+#include "common/common_types.h"
+
+namespace Common {
+
+class WallClock {
+public:
+    virtual std::chrono::nanoseconds GetTimeNS() = 0;
+    virtual std::chrono::microseconds GetTimeUS() = 0;
+    virtual std::chrono::milliseconds GetTimeMS() = 0;
+    virtual u64 GetClockCycles() = 0;
+    virtual u64 GetCPUCycles() = 0;
+
+    /// Tells if the wall clock, uses the host CPU's hardware clock
+    bool IsNative() const {
+        return is_native;
+    }
+
+protected:
+    WallClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency, bool is_native)
+        : emulated_cpu_frequency{emulated_cpu_frequency},
+          emulated_clock_frequency{emulated_clock_frequency}, is_native{is_native} {}
+
+    u64 emulated_cpu_frequency;
+    u64 emulated_clock_frequency;
+
+private:
+    bool is_native;
+};
+
+WallClock* CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency);
+
+} // namespace Common
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp
index c9349a6b4..d767c544c 100644
--- a/src/common/x64/cpu_detect.cpp
+++ b/src/common/x64/cpu_detect.cpp
@@ -62,6 +62,17 @@ static CPUCaps Detect() {
     std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int));
     std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int));
     std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int));
+    if (cpu_id[1] == 0x756e6547 && cpu_id[2] == 0x6c65746e && cpu_id[3] == 0x49656e69)
+        caps.manufacturer = Manufacturer::Intel;
+    else if (cpu_id[1] == 0x68747541 && cpu_id[2] == 0x444d4163 && cpu_id[3] == 0x69746e65)
+        caps.manufacturer = Manufacturer::AMD;
+    else if (cpu_id[1] == 0x6f677948 && cpu_id[2] == 0x656e6975 && cpu_id[3] == 0x6e65476e)
+        caps.manufacturer = Manufacturer::Hygon;
+    else
+        caps.manufacturer = Manufacturer::Unknown;
+
+    u32 family = {};
+    u32 model = {};
 
     __cpuid(cpu_id, 0x80000000);
 
@@ -73,6 +84,14 @@ static CPUCaps Detect() {
     // Detect family and other miscellaneous features
     if (max_std_fn >= 1) {
         __cpuid(cpu_id, 0x00000001);
+        family = (cpu_id[0] >> 8) & 0xf;
+        model = (cpu_id[0] >> 4) & 0xf;
+        if (family == 0xf) {
+            family += (cpu_id[0] >> 20) & 0xff;
+        }
+        if (family >= 6) {
+            model += ((cpu_id[0] >> 16) & 0xf) << 4;
+        }
 
         if ((cpu_id[3] >> 25) & 1)
             caps.sse = true;
@@ -130,6 +149,20 @@ static CPUCaps Detect() {
             caps.fma4 = true;
     }
 
+    if (max_ex_fn >= 0x80000007) {
+        __cpuid(cpu_id, 0x80000007);
+        if (cpu_id[3] & (1 << 8)) {
+            caps.invariant_tsc = true;
+        }
+    }
+
+    if (max_std_fn >= 0x16) {
+        __cpuid(cpu_id, 0x16);
+        caps.base_frequency = cpu_id[0];
+        caps.max_frequency = cpu_id[1];
+        caps.bus_frequency = cpu_id[2];
+    }
+
     return caps;
 }
 
diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h
index 20f2ba234..f0676fa5e 100644
--- a/src/common/x64/cpu_detect.h
+++ b/src/common/x64/cpu_detect.h
@@ -6,8 +6,16 @@
 
 namespace Common {
 
+enum class Manufacturer : u32 {
+    Intel = 0,
+    AMD = 1,
+    Hygon = 2,
+    Unknown = 3,
+};
+
 /// x86/x64 CPU capabilities that may be detected by this module
 struct CPUCaps {
+    Manufacturer manufacturer;
     char cpu_string[0x21];
     char brand_string[0x41];
     bool sse;
@@ -24,6 +32,10 @@ struct CPUCaps {
     bool fma;
     bool fma4;
     bool aes;
+    bool invariant_tsc;
+    u32 base_frequency;
+    u32 max_frequency;
+    u32 bus_frequency;
 };
 
 /**
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp
new file mode 100644
index 000000000..c799111fd
--- /dev/null
+++ b/src/common/x64/native_clock.cpp
@@ -0,0 +1,128 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <chrono>
+#include <thread>
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#else
+#include <x86intrin.h>
+#endif
+
+#include "common/x64/native_clock.h"
+
+namespace Common {
+
+#ifdef _MSC_VER
+
+namespace {
+
+struct uint128 {
+    u64 low;
+    u64 high;
+};
+
+u64 umuldiv64(u64 a, u64 b, u64 d) {
+    uint128 r{};
+    r.low = _umul128(a, b, &r.high);
+    u64 remainder;
+    return _udiv128(r.high, r.low, d, &remainder);
+}
+
+} // namespace
+
+#else
+
+namespace {
+
+u64 umuldiv64(u64 a, u64 b, u64 d) {
+    const u64 diva = a / d;
+    const u64 moda = a % d;
+    const u64 divb = b / d;
+    const u64 modb = b % d;
+    return diva * b + moda * divb + moda * modb / d;
+}
+
+} // namespace
+
+#endif
+
+u64 EstimateRDTSCFrequency() {
+    const auto milli_10 = std::chrono::milliseconds{10};
+    // get current time
+    _mm_mfence();
+    const u64 tscStart = __rdtsc();
+    const auto startTime = std::chrono::high_resolution_clock::now();
+    // wait roughly 3 seconds
+    while (true) {
+        auto milli = std::chrono::duration_cast<std::chrono::milliseconds>(
+            std::chrono::high_resolution_clock::now() - startTime);
+        if (milli.count() >= 3000)
+            break;
+        std::this_thread::sleep_for(milli_10);
+    }
+    const auto endTime = std::chrono::high_resolution_clock::now();
+    _mm_mfence();
+    const u64 tscEnd = __rdtsc();
+    // calculate difference
+    const u64 timer_diff =
+        std::chrono::duration_cast<std::chrono::nanoseconds>(endTime - startTime).count();
+    const u64 tsc_diff = tscEnd - tscStart;
+    const u64 tsc_freq = umuldiv64(tsc_diff, 1000000000ULL, timer_diff);
+    return tsc_freq;
+}
+
+namespace X64 {
+NativeClock::NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency,
+                         u64 rtsc_frequency)
+    : WallClock(emulated_cpu_frequency, emulated_clock_frequency, true), rtsc_frequency{
+                                                                             rtsc_frequency} {
+    _mm_mfence();
+    last_measure = __rdtsc();
+    accumulated_ticks = 0U;
+}
+
+u64 NativeClock::GetRTSC() {
+    rtsc_serialize.lock();
+    _mm_mfence();
+    const u64 current_measure = __rdtsc();
+    u64 diff = current_measure - last_measure;
+    diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0)
+    if (current_measure > last_measure) {
+        last_measure = current_measure;
+    }
+    accumulated_ticks += diff;
+    rtsc_serialize.unlock();
+    return accumulated_ticks;
+}
+
+std::chrono::nanoseconds NativeClock::GetTimeNS() {
+    const u64 rtsc_value = GetRTSC();
+    return std::chrono::nanoseconds{umuldiv64(rtsc_value, 1000000000, rtsc_frequency)};
+}
+
+std::chrono::microseconds NativeClock::GetTimeUS() {
+    const u64 rtsc_value = GetRTSC();
+    return std::chrono::microseconds{umuldiv64(rtsc_value, 1000000, rtsc_frequency)};
+}
+
+std::chrono::milliseconds NativeClock::GetTimeMS() {
+    const u64 rtsc_value = GetRTSC();
+    return std::chrono::milliseconds{umuldiv64(rtsc_value, 1000, rtsc_frequency)};
+}
+
+u64 NativeClock::GetClockCycles() {
+    const u64 rtsc_value = GetRTSC();
+    return umuldiv64(rtsc_value, emulated_clock_frequency, rtsc_frequency);
+}
+
+u64 NativeClock::GetCPUCycles() {
+    const u64 rtsc_value = GetRTSC();
+    return umuldiv64(rtsc_value, emulated_cpu_frequency, rtsc_frequency);
+}
+
+} // namespace X64
+
+} // namespace Common
diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h
new file mode 100644
index 000000000..b58cf9f5a
--- /dev/null
+++ b/src/common/x64/native_clock.h
@@ -0,0 +1,41 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <optional>
+
+#include "common/spin_lock.h"
+#include "common/wall_clock.h"
+
+namespace Common {
+
+namespace X64 {
+class NativeClock : public WallClock {
+public:
+    NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency, u64 rtsc_frequency);
+
+    std::chrono::nanoseconds GetTimeNS() override;
+
+    std::chrono::microseconds GetTimeUS() override;
+
+    std::chrono::milliseconds GetTimeMS() override;
+
+    u64 GetClockCycles() override;
+
+    u64 GetCPUCycles() override;
+
+private:
+    u64 GetRTSC();
+
+    SpinLock rtsc_serialize{};
+    u64 last_measure{};
+    u64 accumulated_ticks{};
+    u64 rtsc_frequency;
+};
+} // namespace X64
+
+u64 EstimateRDTSCFrequency();
+
+} // namespace Common
diff --git a/src/core/host_timing.cpp b/src/core/host_timing.cpp
index d9514b2c5..ef9977b76 100644
--- a/src/core/host_timing.cpp
+++ b/src/core/host_timing.cpp
@@ -35,7 +35,11 @@ struct CoreTiming::Event {
     }
 };
 
-CoreTiming::CoreTiming() = default;
+CoreTiming::CoreTiming() {
+    Common::WallClock* wall = Common::CreateBestMatchingClock(Core::Timing::BASE_CLOCK_RATE, Core::Timing::CNTFREQ);
+    clock = std::unique_ptr<Common::WallClock>(wall);
+}
+
 CoreTiming::~CoreTiming() = default;
 
 void CoreTiming::ThreadEntry(CoreTiming& instance) {
@@ -46,7 +50,6 @@ void CoreTiming::Initialize() {
     event_fifo_id = 0;
     const auto empty_timed_callback = [](u64, s64) {};
     ev_lost = CreateEvent("_lost_event", empty_timed_callback);
-    start_time = std::chrono::steady_clock::now();
     timer_thread = std::make_unique<std::thread>(ThreadEntry, std::ref(*this));
 }
 
@@ -108,13 +111,11 @@ void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u
 }
 
 u64 CoreTiming::GetCPUTicks() const {
-    std::chrono::nanoseconds time_now = GetGlobalTimeNs();
-    return Core::Timing::nsToCycles(time_now);
+    return clock->GetCPUCycles();
 }
 
 u64 CoreTiming::GetClockTicks() const {
-    std::chrono::nanoseconds time_now = GetGlobalTimeNs();
-    return Core::Timing::nsToClockCycles(time_now);
+    return clock->GetClockCycles();
 }
 
 void CoreTiming::ClearPendingEvents() {
@@ -174,15 +175,11 @@ void CoreTiming::Advance() {
 }
 
 std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const {
-    sys_time_point current = std::chrono::steady_clock::now();
-    auto elapsed = current - start_time;
-    return std::chrono::duration_cast<std::chrono::nanoseconds>(elapsed);
+    return clock->GetTimeNS();
 }
 
 std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
-    sys_time_point current = std::chrono::steady_clock::now();
-    auto elapsed = current - start_time;
-    return std::chrono::duration_cast<std::chrono::microseconds>(elapsed);
+    return clock->GetTimeUS();
 }
 
 } // namespace Core::Timing
diff --git a/src/core/host_timing.h b/src/core/host_timing.h
index 1d053a7fa..f04a150ee 100644
--- a/src/core/host_timing.h
+++ b/src/core/host_timing.h
@@ -17,12 +17,12 @@
 #include "common/spin_lock.h"
 #include "common/thread.h"
 #include "common/threadsafe_queue.h"
+#include "common/wall_clock.h"
 
 namespace Core::HostTiming {
 
 /// A callback that may be scheduled for a particular core timing event.
 using TimedCallback = std::function<void(u64 userdata, s64 cycles_late)>;
-using sys_time_point = std::chrono::time_point<std::chrono::steady_clock>;
 
 /// Contains the characteristics of a particular event.
 struct EventType {
@@ -112,7 +112,7 @@ private:
     static void ThreadEntry(CoreTiming& instance);
     void Advance();
 
-    sys_time_point start_time;
+    std::unique_ptr<Common::WallClock> clock;
 
     u64 global_timer = 0;
 
diff --git a/src/tests/core/host_timing.cpp b/src/tests/core/host_timing.cpp
index ca9c8e50a..3d0532d02 100644
--- a/src/tests/core/host_timing.cpp
+++ b/src/tests/core/host_timing.cpp
@@ -17,7 +17,7 @@
 // Numbers are chosen randomly to make sure the correct one is given.
 static constexpr std::array<u64, 5> CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}};
 static constexpr int MAX_SLICE_LENGTH = 10000; // Copied from CoreTiming internals
-static constexpr std::array<u64, 5> calls_order{{2,0,1,4,3}};
+static constexpr std::array<u64, 5> calls_order{{2, 0, 1, 4, 3}};
 static std::array<s64, 5> delays{};
 
 static std::bitset<CB_IDS.size()> callbacks_ran_flags;
@@ -52,16 +52,11 @@ TEST_CASE("HostTiming[BasicOrder]", "[core]") {
     auto& core_timing = guard.core_timing;
     std::vector<std::shared_ptr<Core::HostTiming::EventType>> events;
     events.resize(5);
-    events[0] =
-        Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>);
-    events[1] =
-        Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>);
-    events[2] =
-        Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>);
-    events[3] =
-        Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>);
-    events[4] =
-        Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>);
+    events[0] = Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>);
+    events[1] = Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>);
+    events[2] = Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>);
+    events[3] = Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>);
+    events[4] = Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>);
 
     expected_callback = 0;
 
@@ -70,14 +65,15 @@ TEST_CASE("HostTiming[BasicOrder]", "[core]") {
     u64 one_micro = 1000U;
     for (std::size_t i = 0; i < events.size(); i++) {
         u64 order = calls_order[i];
-        core_timing.ScheduleEvent(i*one_micro + 100U, events[order], CB_IDS[order]);
+        core_timing.ScheduleEvent(i * one_micro + 100U, events[order], CB_IDS[order]);
     }
     /// test pause
     REQUIRE(callbacks_ran_flags.none());
 
     core_timing.Pause(false); // No need to sync
 
-    while (core_timing.HasPendingEvents());
+    while (core_timing.HasPendingEvents())
+        ;
 
     REQUIRE(callbacks_ran_flags.all());
 
@@ -106,16 +102,11 @@ TEST_CASE("HostTiming[BasicOrderNoPausing]", "[core]") {
     auto& core_timing = guard.core_timing;
     std::vector<std::shared_ptr<Core::HostTiming::EventType>> events;
     events.resize(5);
-    events[0] =
-        Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>);
-    events[1] =
-        Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>);
-    events[2] =
-        Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>);
-    events[3] =
-        Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>);
-    events[4] =
-        Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>);
+    events[0] = Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>);
+    events[1] = Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>);
+    events[2] = Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>);
+    events[3] = Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>);
+    events[4] = Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>);
 
     core_timing.SyncPause(true);
     core_timing.SyncPause(false);
@@ -126,13 +117,14 @@ TEST_CASE("HostTiming[BasicOrderNoPausing]", "[core]") {
     u64 one_micro = 1000U;
     for (std::size_t i = 0; i < events.size(); i++) {
         u64 order = calls_order[i];
-        core_timing.ScheduleEvent(i*one_micro + 100U, events[order], CB_IDS[order]);
+        core_timing.ScheduleEvent(i * one_micro + 100U, events[order], CB_IDS[order]);
     }
     u64 end = core_timing.GetGlobalTimeNs().count();
     const double scheduling_time = static_cast<double>(end - start);
     const double timer_time = static_cast<double>(TestTimerSpeed(core_timing));
 
-    while (core_timing.HasPendingEvents());
+    while (core_timing.HasPendingEvents())
+        ;
 
     REQUIRE(callbacks_ran_flags.all());
 
@@ -146,5 +138,6 @@ TEST_CASE("HostTiming[BasicOrderNoPausing]", "[core]") {
     const double micro = scheduling_time / 1000.0f;
     const double mili = micro / 1000.0f;
     printf("HostTimer No Pausing Scheduling Time: %.3f %.6f\n", micro, mili);
-    printf("HostTimer No Pausing Timer Time: %.3f %.6f\n", timer_time / 1000.f, timer_time / 1000000.f);
+    printf("HostTimer No Pausing Timer Time: %.3f %.6f\n", timer_time / 1000.f,
+           timer_time / 1000000.f);
 }

From e3524d114246a9221c766bdf1992777b208cbd67 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 10 Feb 2020 11:20:40 -0400
Subject: [PATCH 08/24] Common: Refactor & Document Wall clock.

---
 src/common/uint128.cpp          | 22 +++++++++++++++
 src/common/uint128.h            |  3 +++
 src/common/wall_clock.cpp       | 13 ++++-----
 src/common/wall_clock.h         | 13 ++++++++-
 src/common/x64/native_clock.cpp | 47 +++++----------------------------
 src/core/host_timing.cpp        |  3 +--
 6 files changed, 50 insertions(+), 51 deletions(-)

diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp
index 32bf56730..7e77588db 100644
--- a/src/common/uint128.cpp
+++ b/src/common/uint128.cpp
@@ -6,12 +6,34 @@
 #include <intrin.h>
 
 #pragma intrinsic(_umul128)
+#pragma intrinsic(_udiv128)
 #endif
 #include <cstring>
 #include "common/uint128.h"
 
 namespace Common {
 
+#ifdef _MSC_VER
+
+u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) {
+    u128 r{};
+    r[0] = _umul128(a, b, &r[1]);
+    u64 remainder;
+    return _udiv128(r[1], r[0], d, &remainder);
+}
+
+#else
+
+u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) {
+    const u64 diva = a / d;
+    const u64 moda = a % d;
+    const u64 divb = b / d;
+    const u64 modb = b % d;
+    return diva * b + moda * divb + moda * modb / d;
+}
+
+#endif
+
 u128 Multiply64Into128(u64 a, u64 b) {
     u128 result;
 #ifdef _MSC_VER
diff --git a/src/common/uint128.h b/src/common/uint128.h
index a3be2a2cb..503cd2d0c 100644
--- a/src/common/uint128.h
+++ b/src/common/uint128.h
@@ -9,6 +9,9 @@
 
 namespace Common {
 
+// This function multiplies 2 u64 values and divides it by a u64 value.
+u64 MultiplyAndDivide64(u64 a, u64 b, u64 d);
+
 // This function multiplies 2 u64 values and produces a u128 value;
 u128 Multiply64Into128(u64 a, u64 b);
 
diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp
index eabbba9da..8f5e17fa4 100644
--- a/src/common/wall_clock.cpp
+++ b/src/common/wall_clock.cpp
@@ -58,7 +58,7 @@ private:
 
 #ifdef ARCHITECTURE_x86_64
 
-WallClock* CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency) {
+std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency) {
     const auto& caps = GetCPUCaps();
     u64 rtsc_frequency = 0;
     if (caps.invariant_tsc) {
@@ -70,19 +70,16 @@ WallClock* CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_cloc
         }
     }
     if (rtsc_frequency == 0) {
-        return static_cast<WallClock*>(
-            new StandardWallClock(emulated_cpu_frequency, emulated_clock_frequency));
+        return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency);
     } else {
-        return static_cast<WallClock*>(
-            new X64::NativeClock(emulated_cpu_frequency, emulated_clock_frequency, rtsc_frequency));
+        return std::make_unique<X64::NativeClock>(emulated_cpu_frequency, emulated_clock_frequency, rtsc_frequency);
     }
 }
 
 #else
 
-WallClock* CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency) {
-    return static_cast<WallClock*>(
-        new StandardWallClock(emulated_cpu_frequency, emulated_clock_frequency));
+std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency) {
+    return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency);
 }
 
 #endif
diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h
index 6f763d74b..fc34429bb 100644
--- a/src/common/wall_clock.h
+++ b/src/common/wall_clock.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <chrono>
+#include <memory>
 
 #include "common/common_types.h"
 
@@ -12,10 +13,20 @@ namespace Common {
 
 class WallClock {
 public:
+
+    /// Returns current wall time in nanoseconds
     virtual std::chrono::nanoseconds GetTimeNS() = 0;
+
+    /// Returns current wall time in microseconds
     virtual std::chrono::microseconds GetTimeUS() = 0;
+
+    /// Returns current wall time in milliseconds
     virtual std::chrono::milliseconds GetTimeMS() = 0;
+
+    /// Returns current wall time in emulated clock cycles
     virtual u64 GetClockCycles() = 0;
+
+    /// Returns current wall time in emulated cpu cycles
     virtual u64 GetCPUCycles() = 0;
 
     /// Tells if the wall clock, uses the host CPU's hardware clock
@@ -35,6 +46,6 @@ private:
     bool is_native;
 };
 
-WallClock* CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency);
+std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency);
 
 } // namespace Common
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp
index c799111fd..26d4d0ba6 100644
--- a/src/common/x64/native_clock.cpp
+++ b/src/common/x64/native_clock.cpp
@@ -11,44 +11,11 @@
 #include <x86intrin.h>
 #endif
 
+#include "common/uint128.h"
 #include "common/x64/native_clock.h"
 
 namespace Common {
 
-#ifdef _MSC_VER
-
-namespace {
-
-struct uint128 {
-    u64 low;
-    u64 high;
-};
-
-u64 umuldiv64(u64 a, u64 b, u64 d) {
-    uint128 r{};
-    r.low = _umul128(a, b, &r.high);
-    u64 remainder;
-    return _udiv128(r.high, r.low, d, &remainder);
-}
-
-} // namespace
-
-#else
-
-namespace {
-
-u64 umuldiv64(u64 a, u64 b, u64 d) {
-    const u64 diva = a / d;
-    const u64 moda = a % d;
-    const u64 divb = b / d;
-    const u64 modb = b % d;
-    return diva * b + moda * divb + moda * modb / d;
-}
-
-} // namespace
-
-#endif
-
 u64 EstimateRDTSCFrequency() {
     const auto milli_10 = std::chrono::milliseconds{10};
     // get current time
@@ -70,7 +37,7 @@ u64 EstimateRDTSCFrequency() {
     const u64 timer_diff =
         std::chrono::duration_cast<std::chrono::nanoseconds>(endTime - startTime).count();
     const u64 tsc_diff = tscEnd - tscStart;
-    const u64 tsc_freq = umuldiv64(tsc_diff, 1000000000ULL, timer_diff);
+    const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff);
     return tsc_freq;
 }
 
@@ -100,27 +67,27 @@ u64 NativeClock::GetRTSC() {
 
 std::chrono::nanoseconds NativeClock::GetTimeNS() {
     const u64 rtsc_value = GetRTSC();
-    return std::chrono::nanoseconds{umuldiv64(rtsc_value, 1000000000, rtsc_frequency)};
+    return std::chrono::nanoseconds{MultiplyAndDivide64(rtsc_value, 1000000000, rtsc_frequency)};
 }
 
 std::chrono::microseconds NativeClock::GetTimeUS() {
     const u64 rtsc_value = GetRTSC();
-    return std::chrono::microseconds{umuldiv64(rtsc_value, 1000000, rtsc_frequency)};
+    return std::chrono::microseconds{MultiplyAndDivide64(rtsc_value, 1000000, rtsc_frequency)};
 }
 
 std::chrono::milliseconds NativeClock::GetTimeMS() {
     const u64 rtsc_value = GetRTSC();
-    return std::chrono::milliseconds{umuldiv64(rtsc_value, 1000, rtsc_frequency)};
+    return std::chrono::milliseconds{MultiplyAndDivide64(rtsc_value, 1000, rtsc_frequency)};
 }
 
 u64 NativeClock::GetClockCycles() {
     const u64 rtsc_value = GetRTSC();
-    return umuldiv64(rtsc_value, emulated_clock_frequency, rtsc_frequency);
+    return MultiplyAndDivide64(rtsc_value, emulated_clock_frequency, rtsc_frequency);
 }
 
 u64 NativeClock::GetCPUCycles() {
     const u64 rtsc_value = GetRTSC();
-    return umuldiv64(rtsc_value, emulated_cpu_frequency, rtsc_frequency);
+    return MultiplyAndDivide64(rtsc_value, emulated_cpu_frequency, rtsc_frequency);
 }
 
 } // namespace X64
diff --git a/src/core/host_timing.cpp b/src/core/host_timing.cpp
index ef9977b76..4ccf7c6c1 100644
--- a/src/core/host_timing.cpp
+++ b/src/core/host_timing.cpp
@@ -36,8 +36,7 @@ struct CoreTiming::Event {
 };
 
 CoreTiming::CoreTiming() {
-    Common::WallClock* wall = Common::CreateBestMatchingClock(Core::Timing::BASE_CLOCK_RATE, Core::Timing::CNTFREQ);
-    clock = std::unique_ptr<Common::WallClock>(wall);
+    clock = Common::CreateBestMatchingClock(Core::Timing::BASE_CLOCK_RATE, Core::Timing::CNTFREQ);
 }
 
 CoreTiming::~CoreTiming() = default;

From 03e4f5dac436fe361834e6b9918983e9c4787acb Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 10 Feb 2020 13:18:23 -0400
Subject: [PATCH 09/24] Common: Correct fcontext fibers.

---
 src/common/fiber.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
index a88a30ced..e91d86dbe 100644
--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -12,6 +12,7 @@
 
 namespace Common {
 
+
 #ifdef _MSC_VER
 
 struct Fiber::FiberImpl {
@@ -82,7 +83,6 @@ std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
 }
 
 #else
-
 constexpr std::size_t default_stack_size = 1024 * 1024 * 4; // 4MB
 
 struct alignas(64) Fiber::FiberImpl {
@@ -108,9 +108,8 @@ void Fiber::FiberStartFunc(boost::context::detail::transfer_t transfer)
 Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
     : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter}, previous_fiber{} {
     impl = std::make_unique<FiberImpl>();
-    auto start_func = std::bind(&Fiber::start, this);
-    impl->context =
-        boost::context::detail::make_fcontext(impl->stack.data(), impl->stack.size(), &start_func);
+    impl->context = boost::context::detail::make_fcontext(impl->stack.data(), impl->stack.size(),
+                                                          FiberStartFunc);
 }
 
 Fiber::Fiber() : guard{}, entry_point{}, start_parameter{}, previous_fiber{} {
@@ -139,7 +138,7 @@ void Fiber::YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to) {
     ASSERT_MSG(to != nullptr, "Next fiber is null!");
     to->guard.lock();
     to->previous_fiber = from;
-    auto transfer = boost::context::detail::jump_fcontext(to->impl.context, nullptr);
+    auto transfer = boost::context::detail::jump_fcontext(to->impl->context, nullptr);
     auto previous_fiber = from->previous_fiber;
     ASSERT(previous_fiber != nullptr);
     previous_fiber->impl->context = transfer.fctx;

From 1bd706344e2381e11245b2f0bdc291429e46c634 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 10 Feb 2020 13:33:13 -0400
Subject: [PATCH 10/24] Common/Tests: Clang Format.

---
 src/common/fiber.cpp        | 21 ++++++++++-----------
 src/common/fiber.h          |  2 +-
 src/common/wall_clock.cpp   | 12 ++++++++----
 src/common/wall_clock.h     |  4 ++--
 src/core/host_timing.cpp    |  8 +++++---
 src/core/host_timing.h      |  2 +-
 src/tests/common/fibers.cpp | 23 ++++++++++++++---------
 7 files changed, 41 insertions(+), 31 deletions(-)

diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
index e91d86dbe..a46be73c1 100644
--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -12,7 +12,6 @@
 
 namespace Common {
 
-
 #ifdef _MSC_VER
 
 struct Fiber::FiberImpl {
@@ -27,14 +26,14 @@ void Fiber::start() {
     UNREACHABLE();
 }
 
-void __stdcall Fiber::FiberStartFunc(void* fiber_parameter)
-{
-   auto fiber = static_cast<Fiber *>(fiber_parameter);
-   fiber->start();
+void __stdcall Fiber::FiberStartFunc(void* fiber_parameter) {
+    auto fiber = static_cast<Fiber*>(fiber_parameter);
+    fiber->start();
 }
 
 Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
-    : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter}, previous_fiber{} {
+    : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter},
+      previous_fiber{} {
     impl = std::make_unique<FiberImpl>();
     impl->handle = CreateFiber(0, &FiberStartFunc, this);
 }
@@ -99,14 +98,14 @@ void Fiber::start(boost::context::detail::transfer_t& transfer) {
     UNREACHABLE();
 }
 
-void Fiber::FiberStartFunc(boost::context::detail::transfer_t transfer)
-{
-   auto fiber = static_cast<Fiber *>(transfer.data);
-   fiber->start(transfer);
+void Fiber::FiberStartFunc(boost::context::detail::transfer_t transfer) {
+    auto fiber = static_cast<Fiber*>(transfer.data);
+    fiber->start(transfer);
 }
 
 Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
-    : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter}, previous_fiber{} {
+    : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter},
+      previous_fiber{} {
     impl = std::make_unique<FiberImpl>();
     impl->context = boost::context::detail::make_fcontext(impl->stack.data(), impl->stack.size(),
                                                           FiberStartFunc);
diff --git a/src/common/fiber.h b/src/common/fiber.h
index 89a01fdd8..b530bf4d2 100644
--- a/src/common/fiber.h
+++ b/src/common/fiber.h
@@ -12,7 +12,7 @@
 
 #ifndef _MSC_VER
 namespace boost::context::detail {
-    struct transfer_t;
+struct transfer_t;
 }
 #endif
 
diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp
index 8f5e17fa4..e6161c72c 100644
--- a/src/common/wall_clock.cpp
+++ b/src/common/wall_clock.cpp
@@ -58,7 +58,8 @@ private:
 
 #ifdef ARCHITECTURE_x86_64
 
-std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency) {
+std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency,
+                                                   u32 emulated_clock_frequency) {
     const auto& caps = GetCPUCaps();
     u64 rtsc_frequency = 0;
     if (caps.invariant_tsc) {
@@ -70,15 +71,18 @@ std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency, u
         }
     }
     if (rtsc_frequency == 0) {
-        return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency);
+        return std::make_unique<StandardWallClock>(emulated_cpu_frequency,
+                                                   emulated_clock_frequency);
     } else {
-        return std::make_unique<X64::NativeClock>(emulated_cpu_frequency, emulated_clock_frequency, rtsc_frequency);
+        return std::make_unique<X64::NativeClock>(emulated_cpu_frequency, emulated_clock_frequency,
+                                                  rtsc_frequency);
     }
 }
 
 #else
 
-std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency) {
+std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency,
+                                                   u32 emulated_clock_frequency) {
     return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency);
 }
 
diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h
index fc34429bb..ed284cf50 100644
--- a/src/common/wall_clock.h
+++ b/src/common/wall_clock.h
@@ -13,7 +13,6 @@ namespace Common {
 
 class WallClock {
 public:
-
     /// Returns current wall time in nanoseconds
     virtual std::chrono::nanoseconds GetTimeNS() = 0;
 
@@ -46,6 +45,7 @@ private:
     bool is_native;
 };
 
-std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency);
+std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency,
+                                                   u32 emulated_clock_frequency);
 
 } // namespace Common
diff --git a/src/core/host_timing.cpp b/src/core/host_timing.cpp
index 4ccf7c6c1..c734a118e 100644
--- a/src/core/host_timing.cpp
+++ b/src/core/host_timing.cpp
@@ -72,7 +72,8 @@ void CoreTiming::SyncPause(bool is_paused) {
     }
     Pause(is_paused);
     event.Set();
-    while (paused_set != is_paused);
+    while (paused_set != is_paused)
+        ;
 }
 
 bool CoreTiming::IsRunning() {
@@ -158,7 +159,8 @@ void CoreTiming::Advance() {
             }
 
             if (!event_queue.empty()) {
-                std::chrono::nanoseconds next_time = std::chrono::nanoseconds(event_queue.front().time - global_timer);
+                std::chrono::nanoseconds next_time =
+                    std::chrono::nanoseconds(event_queue.front().time - global_timer);
                 basic_lock.unlock();
                 event.WaitFor(next_time);
             } else {
@@ -181,4 +183,4 @@ std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
     return clock->GetTimeUS();
 }
 
-} // namespace Core::Timing
+} // namespace Core::HostTiming
diff --git a/src/core/host_timing.h b/src/core/host_timing.h
index f04a150ee..15a150904 100644
--- a/src/core/host_timing.h
+++ b/src/core/host_timing.h
@@ -145,4 +145,4 @@ private:
 ///
 std::shared_ptr<EventType> CreateEvent(std::string name, TimedCallback&& callback);
 
-} // namespace Core::Timing
+} // namespace Core::HostTiming
diff --git a/src/tests/common/fibers.cpp b/src/tests/common/fibers.cpp
index 358393a19..d63194dd4 100644
--- a/src/tests/common/fibers.cpp
+++ b/src/tests/common/fibers.cpp
@@ -92,7 +92,8 @@ public:
 
     void DoWork1() {
         trap2 = false;
-        while (trap.load());
+        while (trap.load())
+            ;
         for (u32 i = 0; i < 12000; i++) {
             value1 += i;
         }
@@ -105,7 +106,8 @@ public:
     }
 
     void DoWork2() {
-        while (trap2.load());
+        while (trap2.load())
+            ;
         value2 = 2000;
         trap = false;
         Fiber::YieldTo(fiber2, fiber1);
@@ -197,9 +199,12 @@ static void ThreadStart2_2(u32 id, TestControl2& test_control) {
 TEST_CASE("Fibers::InterExchange", "[common]") {
     TestControl2 test_control{};
     test_control.thread_fibers.resize(2, nullptr);
-    test_control.fiber1 = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_1}, &test_control);
-    test_control.fiber2 = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_2}, &test_control);
-    test_control.fiber3 = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_3}, &test_control);
+    test_control.fiber1 =
+        std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_1}, &test_control);
+    test_control.fiber2 =
+        std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_2}, &test_control);
+    test_control.fiber3 =
+        std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_3}, &test_control);
     std::thread thread1(ThreadStart2_1, 0, std::ref(test_control));
     std::thread thread2(ThreadStart2_2, 1, std::ref(test_control));
     thread1.join();
@@ -291,8 +296,10 @@ static void ThreadStart3(u32 id, TestControl3& test_control) {
 TEST_CASE("Fibers::StartRace", "[common]") {
     TestControl3 test_control{};
     test_control.thread_fibers.resize(2, nullptr);
-    test_control.fiber1 = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl3_1}, &test_control);
-    test_control.fiber2 = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl3_2}, &test_control);
+    test_control.fiber1 =
+        std::make_shared<Fiber>(std::function<void(void*)>{WorkControl3_1}, &test_control);
+    test_control.fiber2 =
+        std::make_shared<Fiber>(std::function<void(void*)>{WorkControl3_2}, &test_control);
     std::thread thread1(ThreadStart3, 0, std::ref(test_control));
     std::thread thread2(ThreadStart3, 1, std::ref(test_control));
     thread1.join();
@@ -302,6 +309,4 @@ TEST_CASE("Fibers::StartRace", "[common]") {
     REQUIRE(test_control.value3 == 1);
 }
 
-
-
 } // namespace Common

From 3398f701eeac63f3cfcf193f3e9c1ee2f06edb08 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 10 Feb 2020 14:21:23 -0400
Subject: [PATCH 11/24] Common: Make MinGW build use Windows Fibers instead of
 fcontext_t

---
 src/common/fiber.cpp | 4 ++--
 src/common/fiber.h   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
index a46be73c1..050c93acb 100644
--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -4,7 +4,7 @@
 
 #include "common/assert.h"
 #include "common/fiber.h"
-#ifdef _MSC_VER
+#if defined(_WIN32) || defined(WIN32)
 #include <windows.h>
 #else
 #include <boost/context/detail/fcontext.hpp>
@@ -12,7 +12,7 @@
 
 namespace Common {
 
-#ifdef _MSC_VER
+#if defined(_WIN32) || defined(WIN32)
 
 struct Fiber::FiberImpl {
     LPVOID handle = nullptr;
diff --git a/src/common/fiber.h b/src/common/fiber.h
index b530bf4d2..598fe7daa 100644
--- a/src/common/fiber.h
+++ b/src/common/fiber.h
@@ -10,7 +10,7 @@
 #include "common/common_types.h"
 #include "common/spin_lock.h"
 
-#ifndef _MSC_VER
+#if !defined(_WIN32) && !defined(WIN32)
 namespace boost::context::detail {
 struct transfer_t;
 }
@@ -57,7 +57,7 @@ public:
 private:
     Fiber();
 
-#ifdef _MSC_VER
+#if defined(_WIN32) || defined(WIN32)
     void start();
     static void FiberStartFunc(void* fiber_parameter);
 #else

From 1f7dd36499786d373b143a4437d4c32e077a32aa Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 10 Feb 2020 14:45:08 -0400
Subject: [PATCH 12/24] Common/Tests: Address Feedback

---
 src/common/fiber.cpp           |  5 ++---
 src/common/fiber.h             |  8 ++++----
 src/common/spin_lock.cpp       |  3 ++-
 src/core/core_timing_util.cpp  | 14 ++++++++++++--
 src/core/core_timing_util.h    |  2 ++
 src/core/host_timing.cpp       |  4 ++--
 src/core/host_timing.h         |  6 +++---
 src/tests/common/fibers.cpp    | 20 ++++++++++----------
 src/tests/core/host_timing.cpp | 28 ++++++++++++++--------------
 9 files changed, 51 insertions(+), 39 deletions(-)

diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
index 050c93acb..1220eddf0 100644
--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -32,13 +32,12 @@ void __stdcall Fiber::FiberStartFunc(void* fiber_parameter) {
 }
 
 Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
-    : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter},
-      previous_fiber{} {
+    : entry_point{std::move(entry_point_func)}, start_parameter{start_parameter} {
     impl = std::make_unique<FiberImpl>();
     impl->handle = CreateFiber(0, &FiberStartFunc, this);
 }
 
-Fiber::Fiber() : guard{}, entry_point{}, start_parameter{}, previous_fiber{} {
+Fiber::Fiber() {
     impl = std::make_unique<FiberImpl>();
 }
 
diff --git a/src/common/fiber.h b/src/common/fiber.h
index 598fe7daa..7e3b130a4 100644
--- a/src/common/fiber.h
+++ b/src/common/fiber.h
@@ -67,10 +67,10 @@ private:
 
     struct FiberImpl;
 
-    SpinLock guard;
-    std::function<void(void*)> entry_point;
-    void* start_parameter;
-    std::shared_ptr<Fiber> previous_fiber;
+    SpinLock guard{};
+    std::function<void(void*)> entry_point{};
+    void* start_parameter{};
+    std::shared_ptr<Fiber> previous_fiber{};
     std::unique_ptr<FiberImpl> impl;
     bool is_thread_fiber{};
 };
diff --git a/src/common/spin_lock.cpp b/src/common/spin_lock.cpp
index 82a1d39ff..c7b46aac6 100644
--- a/src/common/spin_lock.cpp
+++ b/src/common/spin_lock.cpp
@@ -35,8 +35,9 @@ void thread_pause() {
 namespace Common {
 
 void SpinLock::lock() {
-    while (lck.test_and_set(std::memory_order_acquire))
+    while (lck.test_and_set(std::memory_order_acquire)) {
         thread_pause();
+    }
 }
 
 void SpinLock::unlock() {
diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp
index f42666b4d..be34b26fe 100644
--- a/src/core/core_timing_util.cpp
+++ b/src/core/core_timing_util.cpp
@@ -49,9 +49,19 @@ s64 nsToCycles(std::chrono::nanoseconds ns) {
     return (Hardware::BASE_CLOCK_RATE * ns.count()) / 1000000000;
 }
 
+u64 msToClockCycles(std::chrono::milliseconds ns) {
+    const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ);
+    return Common::Divide128On32(temp, 1000).first;
+}
+
+u64 usToClockCycles(std::chrono::microseconds ns) {
+    const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ);
+    return Common::Divide128On32(temp, 1000000).first;
+}
+
 u64 nsToClockCycles(std::chrono::nanoseconds ns) {
-    const u128 temporal = Common::Multiply64Into128(ns.count(), CNTFREQ);
-    return Common::Divide128On32(temporal, 1000000000).first;
+    const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ);
+    return Common::Divide128On32(temp, 1000000000).first;
 }
 
 u64 CpuCyclesToClockCycles(u64 ticks) {
diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h
index 65fb7368b..b3c58447d 100644
--- a/src/core/core_timing_util.h
+++ b/src/core/core_timing_util.h
@@ -13,6 +13,8 @@ namespace Core::Timing {
 s64 msToCycles(std::chrono::milliseconds ms);
 s64 usToCycles(std::chrono::microseconds us);
 s64 nsToCycles(std::chrono::nanoseconds ns);
+u64 msToClockCycles(std::chrono::milliseconds ns);
+u64 usToClockCycles(std::chrono::microseconds ns);
 u64 nsToClockCycles(std::chrono::nanoseconds ns);
 
 inline std::chrono::milliseconds CyclesToMs(s64 cycles) {
diff --git a/src/core/host_timing.cpp b/src/core/host_timing.cpp
index c734a118e..be80d9f8e 100644
--- a/src/core/host_timing.cpp
+++ b/src/core/host_timing.cpp
@@ -76,11 +76,11 @@ void CoreTiming::SyncPause(bool is_paused) {
         ;
 }
 
-bool CoreTiming::IsRunning() {
+bool CoreTiming::IsRunning() const {
     return !paused_set;
 }
 
-bool CoreTiming::HasPendingEvents() {
+bool CoreTiming::HasPendingEvents() const {
     return !(wait_set && event_queue.empty());
 }
 
diff --git a/src/core/host_timing.h b/src/core/host_timing.h
index 15a150904..679fcf491 100644
--- a/src/core/host_timing.h
+++ b/src/core/host_timing.h
@@ -72,15 +72,15 @@ public:
     void SyncPause(bool is_paused);
 
     /// Checks if core timing is running.
-    bool IsRunning();
+    bool IsRunning() const;
 
     /// Checks if the timer thread has started.
-    bool HasStarted() {
+    bool HasStarted() const {
         return has_started;
     }
 
     /// Checks if there are any pending time events.
-    bool HasPendingEvents();
+    bool HasPendingEvents() const;
 
     /// Schedules an event in core timing
     void ScheduleEvent(s64 ns_into_future, const std::shared_ptr<EventType>& event_type,
diff --git a/src/tests/common/fibers.cpp b/src/tests/common/fibers.cpp
index d63194dd4..0d3d5153d 100644
--- a/src/tests/common/fibers.cpp
+++ b/src/tests/common/fibers.cpp
@@ -34,7 +34,7 @@ public:
 };
 
 static void WorkControl1(void* control) {
-    TestControl1* test_control = static_cast<TestControl1*>(control);
+    auto* test_control = static_cast<TestControl1*>(control);
     test_control->DoWork();
 }
 
@@ -70,8 +70,8 @@ static void ThreadStart1(u32 id, TestControl1& test_control) {
 TEST_CASE("Fibers::Setup", "[common]") {
     constexpr u32 num_threads = 7;
     TestControl1 test_control{};
-    test_control.thread_fibers.resize(num_threads, nullptr);
-    test_control.work_fibers.resize(num_threads, nullptr);
+    test_control.thread_fibers.resize(num_threads);
+    test_control.work_fibers.resize(num_threads);
     test_control.items.resize(num_threads, 0);
     test_control.results.resize(num_threads, 0);
     std::vector<std::thread> threads;
@@ -153,17 +153,17 @@ public:
 };
 
 static void WorkControl2_1(void* control) {
-    TestControl2* test_control = static_cast<TestControl2*>(control);
+    auto* test_control = static_cast<TestControl2*>(control);
     test_control->DoWork1();
 }
 
 static void WorkControl2_2(void* control) {
-    TestControl2* test_control = static_cast<TestControl2*>(control);
+    auto* test_control = static_cast<TestControl2*>(control);
     test_control->DoWork2();
 }
 
 static void WorkControl2_3(void* control) {
-    TestControl2* test_control = static_cast<TestControl2*>(control);
+    auto* test_control = static_cast<TestControl2*>(control);
     test_control->DoWork3();
 }
 
@@ -198,7 +198,7 @@ static void ThreadStart2_2(u32 id, TestControl2& test_control) {
  */
 TEST_CASE("Fibers::InterExchange", "[common]") {
     TestControl2 test_control{};
-    test_control.thread_fibers.resize(2, nullptr);
+    test_control.thread_fibers.resize(2);
     test_control.fiber1 =
         std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_1}, &test_control);
     test_control.fiber2 =
@@ -261,12 +261,12 @@ public:
 };
 
 static void WorkControl3_1(void* control) {
-    TestControl3* test_control = static_cast<TestControl3*>(control);
+    auto* test_control = static_cast<TestControl3*>(control);
     test_control->DoWork1();
 }
 
 static void WorkControl3_2(void* control) {
-    TestControl3* test_control = static_cast<TestControl3*>(control);
+    auto* test_control = static_cast<TestControl3*>(control);
     test_control->DoWork2();
 }
 
@@ -295,7 +295,7 @@ static void ThreadStart3(u32 id, TestControl3& test_control) {
  */
 TEST_CASE("Fibers::StartRace", "[common]") {
     TestControl3 test_control{};
-    test_control.thread_fibers.resize(2, nullptr);
+    test_control.thread_fibers.resize(2);
     test_control.fiber1 =
         std::make_shared<Fiber>(std::function<void(void*)>{WorkControl3_1}, &test_control);
     test_control.fiber2 =
diff --git a/src/tests/core/host_timing.cpp b/src/tests/core/host_timing.cpp
index 3d0532d02..ed060be55 100644
--- a/src/tests/core/host_timing.cpp
+++ b/src/tests/core/host_timing.cpp
@@ -50,13 +50,13 @@ struct ScopeInit final {
 TEST_CASE("HostTiming[BasicOrder]", "[core]") {
     ScopeInit guard;
     auto& core_timing = guard.core_timing;
-    std::vector<std::shared_ptr<Core::HostTiming::EventType>> events;
-    events.resize(5);
-    events[0] = Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>);
-    events[1] = Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>);
-    events[2] = Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>);
-    events[3] = Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>);
-    events[4] = Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>);
+    std::vector<std::shared_ptr<Core::HostTiming::EventType>> events{
+        Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>),
+        Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>),
+        Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>),
+        Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>),
+        Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>),
+    };
 
     expected_callback = 0;
 
@@ -100,13 +100,13 @@ u64 TestTimerSpeed(Core::HostTiming::CoreTiming& core_timing) {
 TEST_CASE("HostTiming[BasicOrderNoPausing]", "[core]") {
     ScopeInit guard;
     auto& core_timing = guard.core_timing;
-    std::vector<std::shared_ptr<Core::HostTiming::EventType>> events;
-    events.resize(5);
-    events[0] = Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>);
-    events[1] = Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>);
-    events[2] = Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>);
-    events[3] = Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>);
-    events[4] = Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>);
+    std::vector<std::shared_ptr<Core::HostTiming::EventType>> events{
+        Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>),
+        Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>),
+        Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>),
+        Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>),
+        Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>),
+    };
 
     core_timing.SyncPause(true);
     core_timing.SyncPause(false);

From 49a7e0984a1210832b8be24433a95711c7ce029b Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 10 Feb 2020 15:02:04 -0400
Subject: [PATCH 13/24] Core/HostTiming: Allow events to be advanced manually.

---
 src/common/fiber.cpp      |  2 +-
 src/common/wall_clock.cpp |  9 +++---
 src/core/host_timing.cpp  | 61 +++++++++++++++++++++++----------------
 src/core/host_timing.h    |  6 +++-
 4 files changed, 47 insertions(+), 31 deletions(-)

diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
index 1220eddf0..e9c0946b6 100644
--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -110,7 +110,7 @@ Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_paramete
                                                           FiberStartFunc);
 }
 
-Fiber::Fiber() : guard{}, entry_point{}, start_parameter{}, previous_fiber{} {
+Fiber::Fiber() {
     impl = std::make_unique<FiberImpl>();
 }
 
diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp
index e6161c72c..d4d35f4e7 100644
--- a/src/common/wall_clock.cpp
+++ b/src/common/wall_clock.cpp
@@ -42,14 +42,15 @@ public:
 
     u64 GetClockCycles() override {
         std::chrono::nanoseconds time_now = GetTimeNS();
-        const u128 temporal = Common::Multiply64Into128(time_now.count(), emulated_clock_frequency);
-        return Common::Divide128On32(temporal, 1000000000).first;
+        const u128 temporary =
+            Common::Multiply64Into128(time_now.count(), emulated_clock_frequency);
+        return Common::Divide128On32(temporary, 1000000000).first;
     }
 
     u64 GetCPUCycles() override {
         std::chrono::nanoseconds time_now = GetTimeNS();
-        const u128 temporal = Common::Multiply64Into128(time_now.count(), emulated_cpu_frequency);
-        return Common::Divide128On32(temporal, 1000000000).first;
+        const u128 temporary = Common::Multiply64Into128(time_now.count(), emulated_cpu_frequency);
+        return Common::Divide128On32(temporary, 1000000000).first;
     }
 
 private:
diff --git a/src/core/host_timing.cpp b/src/core/host_timing.cpp
index be80d9f8e..5d35a96b1 100644
--- a/src/core/host_timing.cpp
+++ b/src/core/host_timing.cpp
@@ -42,7 +42,7 @@ CoreTiming::CoreTiming() {
 CoreTiming::~CoreTiming() = default;
 
 void CoreTiming::ThreadEntry(CoreTiming& instance) {
-    instance.Advance();
+    instance.ThreadLoop();
 }
 
 void CoreTiming::Initialize() {
@@ -137,38 +137,49 @@ void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) {
     basic_lock.unlock();
 }
 
-void CoreTiming::Advance() {
+std::optional<u64> CoreTiming::Advance() {
+    advance_lock.lock();
+    basic_lock.lock();
+    global_timer = GetGlobalTimeNs().count();
+
+    while (!event_queue.empty() && event_queue.front().time <= global_timer) {
+        Event evt = std::move(event_queue.front());
+        std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>());
+        event_queue.pop_back();
+        basic_lock.unlock();
+
+        if (auto event_type{evt.type.lock()}) {
+            event_type->callback(evt.userdata, global_timer - evt.time);
+        }
+
+        basic_lock.lock();
+    }
+
+    if (!event_queue.empty()) {
+        const u64 next_time = event_queue.front().time - global_timer;
+        basic_lock.unlock();
+        advance_lock.unlock();
+        return next_time;
+    } else {
+        basic_lock.unlock();
+        advance_lock.unlock();
+        return std::nullopt;
+    }
+}
+
+void CoreTiming::ThreadLoop() {
     has_started = true;
     while (!shutting_down) {
         while (!paused) {
             paused_set = false;
-            basic_lock.lock();
-            global_timer = GetGlobalTimeNs().count();
-
-            while (!event_queue.empty() && event_queue.front().time <= global_timer) {
-                Event evt = std::move(event_queue.front());
-                std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>());
-                event_queue.pop_back();
-                basic_lock.unlock();
-
-                if (auto event_type{evt.type.lock()}) {
-                    event_type->callback(evt.userdata, global_timer - evt.time);
-                }
-
-                basic_lock.lock();
-            }
-
-            if (!event_queue.empty()) {
-                std::chrono::nanoseconds next_time =
-                    std::chrono::nanoseconds(event_queue.front().time - global_timer);
-                basic_lock.unlock();
-                event.WaitFor(next_time);
+            const auto next_time = Advance();
+            if (next_time) {
+                std::chrono::nanoseconds next_time_ns = std::chrono::nanoseconds(*next_time);
+                event.WaitFor(next_time_ns);
             } else {
-                basic_lock.unlock();
                 wait_set = true;
                 event.Wait();
             }
-
             wait_set = false;
         }
         paused_set = true;
diff --git a/src/core/host_timing.h b/src/core/host_timing.h
index 679fcf491..cd44b308c 100644
--- a/src/core/host_timing.h
+++ b/src/core/host_timing.h
@@ -103,6 +103,9 @@ public:
     /// Returns current time in nanoseconds.
     std::chrono::nanoseconds GetGlobalTimeNs() const;
 
+    /// Checks for events manually and returns time in nanoseconds for next event, threadsafe.
+    std::optional<u64> Advance();
+
 private:
     struct Event;
 
@@ -110,7 +113,7 @@ private:
     void ClearPendingEvents();
 
     static void ThreadEntry(CoreTiming& instance);
-    void Advance();
+    void ThreadLoop();
 
     std::unique_ptr<Common::WallClock> clock;
 
@@ -128,6 +131,7 @@ private:
     std::shared_ptr<EventType> ev_lost;
     Common::Event event{};
     Common::SpinLock basic_lock{};
+    Common::SpinLock advance_lock{};
     std::unique_ptr<std::thread> timer_thread;
     std::atomic<bool> paused{};
     std::atomic<bool> paused_set{};

From 96b2d8419c94f9bcb5f2f970bbb453aa7383b510 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 15 Feb 2020 13:56:50 -0400
Subject: [PATCH 14/24] HostTiming: Correct rebase and implement AddTicks.

---
 src/core/host_timing.cpp | 11 ++++++++++-
 src/core/host_timing.h   |  9 +++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/src/core/host_timing.cpp b/src/core/host_timing.cpp
index 5d35a96b1..2f40de1a1 100644
--- a/src/core/host_timing.cpp
+++ b/src/core/host_timing.cpp
@@ -36,7 +36,8 @@ struct CoreTiming::Event {
 };
 
 CoreTiming::CoreTiming() {
-    clock = Common::CreateBestMatchingClock(Core::Timing::BASE_CLOCK_RATE, Core::Timing::CNTFREQ);
+    clock =
+        Common::CreateBestMatchingClock(Core::Hardware::BASE_CLOCK_RATE, Core::Hardware::CNTFREQ);
 }
 
 CoreTiming::~CoreTiming() = default;
@@ -110,6 +111,14 @@ void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u
     basic_lock.unlock();
 }
 
+void CoreTiming::AddTicks(std::size_t core_index, u64 ticks) {
+    ticks_count[core_index] += ticks;
+}
+
+void CoreTiming::ResetTicks(std::size_t core_index) {
+    ticks_count[core_index] = 0;
+}
+
 u64 CoreTiming::GetCPUTicks() const {
     return clock->GetCPUCycles();
 }
diff --git a/src/core/host_timing.h b/src/core/host_timing.h
index cd44b308c..5ad8c5f35 100644
--- a/src/core/host_timing.h
+++ b/src/core/host_timing.h
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include <atomic>
 #include <chrono>
 #include <functional>
 #include <memory>
@@ -18,6 +19,7 @@
 #include "common/thread.h"
 #include "common/threadsafe_queue.h"
 #include "common/wall_clock.h"
+#include "core/hardware_properties.h"
 
 namespace Core::HostTiming {
 
@@ -91,6 +93,11 @@ public:
     /// We only permit one event of each type in the queue at a time.
     void RemoveEvent(const std::shared_ptr<EventType>& event_type);
 
+
+    void AddTicks(std::size_t core_index, u64 ticks);
+
+    void ResetTicks(std::size_t core_index);
+
     /// Returns current time in emulated CPU cycles
     u64 GetCPUTicks() const;
 
@@ -138,6 +145,8 @@ private:
     std::atomic<bool> wait_set{};
     std::atomic<bool> shutting_down{};
     std::atomic<bool> has_started{};
+
+    std::array<std::atomic<u64>, Core::Hardware::NUM_CPU_CORES> ticks_count{};
 };
 
 /// Creates a core timing event with the given name and callback.

From 8f6ffcd5c450b06f641580c410e5a4756a897fcc Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 22 Feb 2020 12:56:33 -0400
Subject: [PATCH 15/24] Host Timing: Correct clang format.

---
 src/core/host_timing.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/core/host_timing.h b/src/core/host_timing.h
index 5ad8c5f35..be6b68d7c 100644
--- a/src/core/host_timing.h
+++ b/src/core/host_timing.h
@@ -93,7 +93,6 @@ public:
     /// We only permit one event of each type in the queue at a time.
     void RemoveEvent(const std::shared_ptr<EventType>& event_type);
 
-
     void AddTicks(std::size_t core_index, u64 ticks);
 
     void ResetTicks(std::size_t core_index);

From 7d2b1a6ec4a1c0daea0bac83a83c85f263609224 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 26 Feb 2020 14:39:27 -0400
Subject: [PATCH 16/24] Common/Fiber: Correct f_context based Fibers.

---
 src/common/fiber.cpp | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
index e9c0946b6..3ef820c62 100644
--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -81,10 +81,10 @@ std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
 }
 
 #else
-constexpr std::size_t default_stack_size = 1024 * 1024 * 4; // 4MB
+constexpr std::size_t default_stack_size = 1024 * 1024; // 4MB
 
-struct alignas(64) Fiber::FiberImpl {
-    std::array<u8, default_stack_size> stack;
+struct Fiber::FiberImpl {
+    alignas(64) std::array<u8, default_stack_size> stack;
     boost::context::detail::fcontext_t context;
 };
 
@@ -106,8 +106,10 @@ Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_paramete
     : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter},
       previous_fiber{} {
     impl = std::make_unique<FiberImpl>();
-    impl->context = boost::context::detail::make_fcontext(impl->stack.data(), impl->stack.size(),
-                                                          FiberStartFunc);
+    void* stack_start =
+        static_cast<void*>(static_cast<std::uintptr_t>(impl->stack.data()) + default_stack_size);
+    impl->context =
+        boost::context::detail::make_fcontext(stack_start, impl->stack.size(), FiberStartFunc);
 }
 
 Fiber::Fiber() {
@@ -136,7 +138,7 @@ void Fiber::YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to) {
     ASSERT_MSG(to != nullptr, "Next fiber is null!");
     to->guard.lock();
     to->previous_fiber = from;
-    auto transfer = boost::context::detail::jump_fcontext(to->impl->context, nullptr);
+    auto transfer = boost::context::detail::jump_fcontext(to->impl->context, to.get());
     auto previous_fiber = from->previous_fiber;
     ASSERT(previous_fiber != nullptr);
     previous_fiber->impl->context = transfer.fctx;

From 41013381d69f952f78b85de3ce226c1499d889b6 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 26 Feb 2020 17:34:23 -0400
Subject: [PATCH 17/24] Common/Fiber: Additional corrections to f_context.

---
 src/common/fiber.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
index 3ef820c62..e4ecc73df 100644
--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -81,7 +81,7 @@ std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
 }
 
 #else
-constexpr std::size_t default_stack_size = 1024 * 1024; // 4MB
+constexpr std::size_t default_stack_size = 1024 * 1024; // 1MB
 
 struct Fiber::FiberImpl {
     alignas(64) std::array<u8, default_stack_size> stack;
@@ -106,10 +106,10 @@ Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_paramete
     : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter},
       previous_fiber{} {
     impl = std::make_unique<FiberImpl>();
-    void* stack_start =
-        static_cast<void*>(static_cast<std::uintptr_t>(impl->stack.data()) + default_stack_size);
+    u8* stack_limit = impl->stack.data();
+    u8* stack_base = stack_limit + default_stack_size;
     impl->context =
-        boost::context::detail::make_fcontext(stack_start, impl->stack.size(), FiberStartFunc);
+        boost::context::detail::make_fcontext(stack_base, impl->stack.size(), FiberStartFunc);
 }
 
 Fiber::Fiber() {

From 137d862d9b275209b3d62a413396a15e9e14b4b4 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 27 Feb 2020 16:32:47 -0400
Subject: [PATCH 18/24] Common/Fiber: Implement Rewinding.

---
 src/common/fiber.cpp        | 32 ++++++++++++++++++++++++--
 src/common/fiber.h          |  8 +++++++
 src/tests/common/fibers.cpp | 46 +++++++++++++++++++++++++++++++++++++
 3 files changed, 84 insertions(+), 2 deletions(-)

diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
index e4ecc73df..f61479e13 100644
--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -12,10 +12,13 @@
 
 namespace Common {
 
+constexpr std::size_t default_stack_size = 256 * 1024; // 256kb
+
 #if defined(_WIN32) || defined(WIN32)
 
 struct Fiber::FiberImpl {
     LPVOID handle = nullptr;
+    LPVOID rewind_handle = nullptr;
 };
 
 void Fiber::start() {
@@ -26,15 +29,29 @@ void Fiber::start() {
     UNREACHABLE();
 }
 
+void Fiber::onRewind() {
+    ASSERT(impl->handle != nullptr);
+    DeleteFiber(impl->handle);
+    impl->handle = impl->rewind_handle;
+    impl->rewind_handle = nullptr;
+    rewind_point(rewind_parameter);
+    UNREACHABLE();
+}
+
 void __stdcall Fiber::FiberStartFunc(void* fiber_parameter) {
     auto fiber = static_cast<Fiber*>(fiber_parameter);
     fiber->start();
 }
 
+void __stdcall Fiber::RewindStartFunc(void* fiber_parameter) {
+    auto fiber = static_cast<Fiber*>(fiber_parameter);
+    fiber->onRewind();
+}
+
 Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
     : entry_point{std::move(entry_point_func)}, start_parameter{start_parameter} {
     impl = std::make_unique<FiberImpl>();
-    impl->handle = CreateFiber(0, &FiberStartFunc, this);
+    impl->handle = CreateFiber(default_stack_size, &FiberStartFunc, this);
 }
 
 Fiber::Fiber() {
@@ -60,6 +77,18 @@ void Fiber::Exit() {
     guard.unlock();
 }
 
+void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter) {
+    rewind_point = std::move(rewind_func);
+    rewind_parameter = start_parameter;
+}
+
+void Fiber::Rewind() {
+    ASSERT(rewind_point);
+    ASSERT(impl->rewind_handle == nullptr);
+    impl->rewind_handle = CreateFiber(default_stack_size, &RewindStartFunc, this);
+    SwitchToFiber(impl->rewind_handle);
+}
+
 void Fiber::YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to) {
     ASSERT_MSG(from != nullptr, "Yielding fiber is null!");
     ASSERT_MSG(to != nullptr, "Next fiber is null!");
@@ -81,7 +110,6 @@ std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
 }
 
 #else
-constexpr std::size_t default_stack_size = 1024 * 1024; // 1MB
 
 struct Fiber::FiberImpl {
     alignas(64) std::array<u8, default_stack_size> stack;
diff --git a/src/common/fiber.h b/src/common/fiber.h
index 7e3b130a4..a710df257 100644
--- a/src/common/fiber.h
+++ b/src/common/fiber.h
@@ -46,6 +46,10 @@ public:
     static void YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to);
     static std::shared_ptr<Fiber> ThreadToFiber();
 
+    void SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter);
+
+    void Rewind();
+
     /// Only call from main thread's fiber
     void Exit();
 
@@ -58,8 +62,10 @@ private:
     Fiber();
 
 #if defined(_WIN32) || defined(WIN32)
+    void onRewind();
     void start();
     static void FiberStartFunc(void* fiber_parameter);
+    static void RewindStartFunc(void* fiber_parameter);
 #else
     void start(boost::context::detail::transfer_t& transfer);
     static void FiberStartFunc(boost::context::detail::transfer_t transfer);
@@ -69,6 +75,8 @@ private:
 
     SpinLock guard{};
     std::function<void(void*)> entry_point{};
+    std::function<void(void*)> rewind_point{};
+    void* rewind_parameter{};
     void* start_parameter{};
     std::shared_ptr<Fiber> previous_fiber{};
     std::unique_ptr<FiberImpl> impl;
diff --git a/src/tests/common/fibers.cpp b/src/tests/common/fibers.cpp
index 0d3d5153d..12536b6d8 100644
--- a/src/tests/common/fibers.cpp
+++ b/src/tests/common/fibers.cpp
@@ -309,4 +309,50 @@ TEST_CASE("Fibers::StartRace", "[common]") {
     REQUIRE(test_control.value3 == 1);
 }
 
+class TestControl4;
+
+static void WorkControl4(void* control);
+
+class TestControl4 {
+public:
+    TestControl4() {
+        fiber1 = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl4}, this);
+        goal_reached = false;
+        rewinded = false;
+    }
+
+    void Execute() {
+        thread_fiber = Fiber::ThreadToFiber();
+        Fiber::YieldTo(thread_fiber, fiber1);
+        thread_fiber->Exit();
+    }
+
+    void DoWork() {
+        fiber1->SetRewindPoint(std::function<void(void*)>{WorkControl4}, this);
+        if (rewinded) {
+            goal_reached = true;
+            Fiber::YieldTo(fiber1, thread_fiber);
+        }
+        rewinded = true;
+        fiber1->Rewind();
+    }
+
+    std::shared_ptr<Common::Fiber> fiber1;
+    std::shared_ptr<Common::Fiber> thread_fiber;
+    bool goal_reached;
+    bool rewinded;
+};
+
+static void WorkControl4(void* control) {
+    auto* test_control = static_cast<TestControl4*>(control);
+    test_control->DoWork();
+}
+
+TEST_CASE("Fibers::Rewind", "[common]") {
+    TestControl4 test_control{};
+    test_control.Execute();
+    REQUIRE(test_control.goal_reached);
+    REQUIRE(test_control.rewinded);
+}
+
 } // namespace Common

From 18f54f74862322d5a9360cbdc3541b6e3f15dce6 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 6 Mar 2020 11:24:08 -0400
Subject: [PATCH 19/24] Common/Fiber: Document fiber interexchange.

---
 src/common/fiber.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/common/fiber.h b/src/common/fiber.h
index a710df257..3bbd506b5 100644
--- a/src/common/fiber.h
+++ b/src/common/fiber.h
@@ -28,7 +28,10 @@ namespace Common {
  * to implement such patterns. This fiber class is 'threadsafe' only one fiber
  * can be running at a time and threads will be locked while trying to yield to
  * a running fiber until it yields. WARNING exchanging two running fibers between
- * threads will cause a deadlock.
+ * threads will cause a deadlock. In order to prevent a deadlock, each thread should
+ * have an intermediary fiber, you switch to the intermediary fiber of the current
+ * thread and then from it switch to the expected fiber. This way you can exchange
+ * 2 fibers within 2 different threads.
  */
 class Fiber {
 public:

From 59ce6e6d06e5ce8628f96bb247a342dec356fe43 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 12 Mar 2020 20:10:51 -0400
Subject: [PATCH 20/24] Common/uint128: Correct MSVC Compilation in old
 versions.

---
 src/common/uint128.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp
index 7e77588db..16bf7c828 100644
--- a/src/common/uint128.cpp
+++ b/src/common/uint128.cpp
@@ -19,7 +19,11 @@ u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) {
     u128 r{};
     r[0] = _umul128(a, b, &r[1]);
     u64 remainder;
+#if _MSC_VER < 1923
+    return udiv128(r[1], r[0], d, &remainder);
+#else
     return _udiv128(r[1], r[0], d, &remainder);
+#endif
 }
 
 #else

From b6655aa2e492e326e319b09e832c1612bf27acf4 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 1 Apr 2020 09:19:10 -0400
Subject: [PATCH 21/24] Common/Fiber: Implement Rewind on Boost Context.

---
 src/common/fiber.cpp | 39 +++++++++++++++++++++++++++++++++++++--
 src/common/fiber.h   |  2 ++
 2 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
index f61479e13..6ea314d75 100644
--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -113,7 +113,11 @@ std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
 
 struct Fiber::FiberImpl {
     alignas(64) std::array<u8, default_stack_size> stack;
+    u8* stack_limit;
+    alignas(64) std::array<u8, default_stack_size> rewind_stack;
+    u8* rewind_stack_limit;
     boost::context::detail::fcontext_t context;
+    boost::context::detail::fcontext_t rewind_context;
 };
 
 void Fiber::start(boost::context::detail::transfer_t& transfer) {
@@ -125,21 +129,43 @@ void Fiber::start(boost::context::detail::transfer_t& transfer) {
     UNREACHABLE();
 }
 
+void Fiber::onRewind(boost::context::detail::transfer_t& [[maybe_unused]] transfer) {
+    ASSERT(impl->context != nullptr);
+    impl->context = impl->rewind_context;
+    impl->rewind_context = nullptr;
+    u8* tmp = impl->stack_limit;
+    impl->stack_limit = impl->rewind_stack_limit;
+    impl->rewind_stack_limit = tmp;
+    rewind_point(rewind_parameter);
+    UNREACHABLE();
+}
+
 void Fiber::FiberStartFunc(boost::context::detail::transfer_t transfer) {
     auto fiber = static_cast<Fiber*>(transfer.data);
     fiber->start(transfer);
 }
 
+void Fiber::RewindStartFunc(boost::context::detail::transfer_t transfer) {
+    auto fiber = static_cast<Fiber*>(transfer.data);
+    fiber->onRewind(transfer);
+}
+
 Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
     : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter},
       previous_fiber{} {
     impl = std::make_unique<FiberImpl>();
-    u8* stack_limit = impl->stack.data();
-    u8* stack_base = stack_limit + default_stack_size;
+    impl->stack_limit = impl->stack.data();
+    impl->rewind_stack_limit = impl->rewind_stack.data();
+    u8* stack_base = impl->stack_limit + default_stack_size;
     impl->context =
         boost::context::detail::make_fcontext(stack_base, impl->stack.size(), FiberStartFunc);
 }
 
+void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter) {
+    rewind_point = std::move(rewind_func);
+    rewind_parameter = start_parameter;
+}
+
 Fiber::Fiber() {
     impl = std::make_unique<FiberImpl>();
 }
@@ -161,6 +187,15 @@ void Fiber::Exit() {
     guard.unlock();
 }
 
+void Fiber::Rewind() {
+    ASSERT(rewind_point);
+    ASSERT(impl->rewind_context == nullptr);
+    u8* stack_base = impl->rewind_stack_limit + default_stack_size;
+    impl->rewind_context =
+        boost::context::detail::make_fcontext(stack_base, impl->stack.size(), RewindStartFunc);
+    boost::context::detail::jump_fcontext(impl->rewind_context, this);
+}
+
 void Fiber::YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to) {
     ASSERT_MSG(from != nullptr, "Yielding fiber is null!");
     ASSERT_MSG(to != nullptr, "Next fiber is null!");
diff --git a/src/common/fiber.h b/src/common/fiber.h
index 3bbd506b5..cab7bc4b5 100644
--- a/src/common/fiber.h
+++ b/src/common/fiber.h
@@ -70,8 +70,10 @@ private:
     static void FiberStartFunc(void* fiber_parameter);
     static void RewindStartFunc(void* fiber_parameter);
 #else
+    void onRewind(boost::context::detail::transfer_t& transfer);
     void start(boost::context::detail::transfer_t& transfer);
     static void FiberStartFunc(boost::context::detail::transfer_t transfer);
+    static void RewindStartFunc(boost::context::detail::transfer_t transfer);
 #endif
 
     struct FiberImpl;

From e77ee67bfacf9a0d3b9e7cd164531a2be158adc9 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 13 May 2020 13:49:36 -0400
Subject: [PATCH 22/24] Common/Fiber: Address Feedback and Correct Memory
 leaks.

---
 src/common/fiber.cpp | 58 ++++++++++++++++++++++++--------------------
 src/common/fiber.h   | 17 +++++++------
 2 files changed, 41 insertions(+), 34 deletions(-)

diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
index 6ea314d75..f97ad433b 100644
--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -21,7 +21,7 @@ struct Fiber::FiberImpl {
     LPVOID rewind_handle = nullptr;
 };
 
-void Fiber::start() {
+void Fiber::Start() {
     ASSERT(previous_fiber != nullptr);
     previous_fiber->guard.unlock();
     previous_fiber.reset();
@@ -29,7 +29,7 @@ void Fiber::start() {
     UNREACHABLE();
 }
 
-void Fiber::onRewind() {
+void Fiber::OnRewind() {
     ASSERT(impl->handle != nullptr);
     DeleteFiber(impl->handle);
     impl->handle = impl->rewind_handle;
@@ -38,14 +38,14 @@ void Fiber::onRewind() {
     UNREACHABLE();
 }
 
-void __stdcall Fiber::FiberStartFunc(void* fiber_parameter) {
+void Fiber::FiberStartFunc(void* fiber_parameter) {
     auto fiber = static_cast<Fiber*>(fiber_parameter);
-    fiber->start();
+    fiber->Start();
 }
 
-void __stdcall Fiber::RewindStartFunc(void* fiber_parameter) {
+void Fiber::RewindStartFunc(void* fiber_parameter) {
     auto fiber = static_cast<Fiber*>(fiber_parameter);
-    fiber->onRewind();
+    fiber->OnRewind();
 }
 
 Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
@@ -59,8 +59,11 @@ Fiber::Fiber() {
 }
 
 Fiber::~Fiber() {
+    if (released) {
+        return;
+    }
     // Make sure the Fiber is not being used
-    bool locked = guard.try_lock();
+    const bool locked = guard.try_lock();
     ASSERT_MSG(locked, "Destroying a fiber that's still running");
     if (locked) {
         guard.unlock();
@@ -75,6 +78,7 @@ void Fiber::Exit() {
     }
     ConvertFiberToThread();
     guard.unlock();
+    released = true;
 }
 
 void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter) {
@@ -89,22 +93,21 @@ void Fiber::Rewind() {
     SwitchToFiber(impl->rewind_handle);
 }
 
-void Fiber::YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to) {
+void Fiber::YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to) {
     ASSERT_MSG(from != nullptr, "Yielding fiber is null!");
     ASSERT_MSG(to != nullptr, "Next fiber is null!");
     to->guard.lock();
     to->previous_fiber = from;
     SwitchToFiber(to->impl->handle);
-    auto previous_fiber = from->previous_fiber;
-    ASSERT(previous_fiber != nullptr);
-    previous_fiber->guard.unlock();
-    previous_fiber.reset();
+    ASSERT(from->previous_fiber != nullptr);
+    from->previous_fiber->guard.unlock();
+    from->previous_fiber.reset();
 }
 
 std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
     std::shared_ptr<Fiber> fiber = std::shared_ptr<Fiber>{new Fiber()};
     fiber->guard.lock();
-    fiber->impl->handle = ConvertThreadToFiber(NULL);
+    fiber->impl->handle = ConvertThreadToFiber(nullptr);
     fiber->is_thread_fiber = true;
     return fiber;
 }
@@ -120,7 +123,7 @@ struct Fiber::FiberImpl {
     boost::context::detail::fcontext_t rewind_context;
 };
 
-void Fiber::start(boost::context::detail::transfer_t& transfer) {
+void Fiber::Start(boost::context::detail::transfer_t& transfer) {
     ASSERT(previous_fiber != nullptr);
     previous_fiber->impl->context = transfer.fctx;
     previous_fiber->guard.unlock();
@@ -129,7 +132,7 @@ void Fiber::start(boost::context::detail::transfer_t& transfer) {
     UNREACHABLE();
 }
 
-void Fiber::onRewind(boost::context::detail::transfer_t& [[maybe_unused]] transfer) {
+void Fiber::OnRewind([[maybe_unused]] boost::context::detail::transfer_t& transfer) {
     ASSERT(impl->context != nullptr);
     impl->context = impl->rewind_context;
     impl->rewind_context = nullptr;
@@ -142,17 +145,16 @@ void Fiber::onRewind(boost::context::detail::transfer_t& [[maybe_unused]] transf
 
 void Fiber::FiberStartFunc(boost::context::detail::transfer_t transfer) {
     auto fiber = static_cast<Fiber*>(transfer.data);
-    fiber->start(transfer);
+    fiber->Start(transfer);
 }
 
 void Fiber::RewindStartFunc(boost::context::detail::transfer_t transfer) {
     auto fiber = static_cast<Fiber*>(transfer.data);
-    fiber->onRewind(transfer);
+    fiber->OnRewind(transfer);
 }
 
 Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
-    : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter},
-      previous_fiber{} {
+    : entry_point{std::move(entry_point_func)}, start_parameter{start_parameter} {
     impl = std::make_unique<FiberImpl>();
     impl->stack_limit = impl->stack.data();
     impl->rewind_stack_limit = impl->rewind_stack.data();
@@ -171,8 +173,11 @@ Fiber::Fiber() {
 }
 
 Fiber::~Fiber() {
+    if (released) {
+        return;
+    }
     // Make sure the Fiber is not being used
-    bool locked = guard.try_lock();
+    const bool locked = guard.try_lock();
     ASSERT_MSG(locked, "Destroying a fiber that's still running");
     if (locked) {
         guard.unlock();
@@ -180,11 +185,13 @@ Fiber::~Fiber() {
 }
 
 void Fiber::Exit() {
+
     ASSERT_MSG(is_thread_fiber, "Exitting non main thread fiber");
     if (!is_thread_fiber) {
         return;
     }
     guard.unlock();
+    released = true;
 }
 
 void Fiber::Rewind() {
@@ -196,17 +203,16 @@ void Fiber::Rewind() {
     boost::context::detail::jump_fcontext(impl->rewind_context, this);
 }
 
-void Fiber::YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to) {
+void Fiber::YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to) {
     ASSERT_MSG(from != nullptr, "Yielding fiber is null!");
     ASSERT_MSG(to != nullptr, "Next fiber is null!");
     to->guard.lock();
     to->previous_fiber = from;
     auto transfer = boost::context::detail::jump_fcontext(to->impl->context, to.get());
-    auto previous_fiber = from->previous_fiber;
-    ASSERT(previous_fiber != nullptr);
-    previous_fiber->impl->context = transfer.fctx;
-    previous_fiber->guard.unlock();
-    previous_fiber.reset();
+    ASSERT(from->previous_fiber != nullptr);
+    from->previous_fiber->impl->context = transfer.fctx;
+    from->previous_fiber->guard.unlock();
+    from->previous_fiber.reset();
 }
 
 std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
diff --git a/src/common/fiber.h b/src/common/fiber.h
index cab7bc4b5..dafc1100e 100644
--- a/src/common/fiber.h
+++ b/src/common/fiber.h
@@ -46,7 +46,7 @@ public:
 
     /// Yields control from Fiber 'from' to Fiber 'to'
     /// Fiber 'from' must be the currently running fiber.
-    static void YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to);
+    static void YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to);
     static std::shared_ptr<Fiber> ThreadToFiber();
 
     void SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter);
@@ -65,13 +65,13 @@ private:
     Fiber();
 
 #if defined(_WIN32) || defined(WIN32)
-    void onRewind();
-    void start();
+    void OnRewind();
+    void Start();
     static void FiberStartFunc(void* fiber_parameter);
     static void RewindStartFunc(void* fiber_parameter);
 #else
-    void onRewind(boost::context::detail::transfer_t& transfer);
-    void start(boost::context::detail::transfer_t& transfer);
+    void OnRewind(boost::context::detail::transfer_t& transfer);
+    void Start(boost::context::detail::transfer_t& transfer);
     static void FiberStartFunc(boost::context::detail::transfer_t transfer);
     static void RewindStartFunc(boost::context::detail::transfer_t transfer);
 #endif
@@ -79,13 +79,14 @@ private:
     struct FiberImpl;
 
     SpinLock guard{};
-    std::function<void(void*)> entry_point{};
-    std::function<void(void*)> rewind_point{};
+    std::function<void(void*)> entry_point;
+    std::function<void(void*)> rewind_point;
     void* rewind_parameter{};
     void* start_parameter{};
-    std::shared_ptr<Fiber> previous_fiber{};
+    std::shared_ptr<Fiber> previous_fiber;
     std::unique_ptr<FiberImpl> impl;
     bool is_thread_fiber{};
+    bool released{};
 };
 
 } // namespace Common

From 45d29436b32bbee1bdf1344e3dc3db365dc42937 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 14 May 2020 14:10:49 -0400
Subject: [PATCH 23/24] Tests/HostTiming: Correct GCC Compile error.

---
 src/tests/core/host_timing.cpp | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/src/tests/core/host_timing.cpp b/src/tests/core/host_timing.cpp
index ed060be55..556254098 100644
--- a/src/tests/core/host_timing.cpp
+++ b/src/tests/core/host_timing.cpp
@@ -22,7 +22,6 @@ static std::array<s64, 5> delays{};
 
 static std::bitset<CB_IDS.size()> callbacks_ran_flags;
 static u64 expected_callback = 0;
-static s64 lateness = 0;
 
 template <unsigned int IDX>
 void HostCallbackTemplate(u64 userdata, s64 nanoseconds_late) {
@@ -34,8 +33,6 @@ void HostCallbackTemplate(u64 userdata, s64 nanoseconds_late) {
     ++expected_callback;
 }
 
-static u64 callbacks_done = 0;
-
 struct ScopeInit final {
     ScopeInit() {
         core_timing.Initialize();
@@ -47,6 +44,20 @@ struct ScopeInit final {
     Core::HostTiming::CoreTiming core_timing;
 };
 
+#pragma optimize("", off)
+
+static u64 TestTimerSpeed(Core::HostTiming::CoreTiming& core_timing) {
+    u64 start = core_timing.GetGlobalTimeNs().count();
+    u64 placebo = 0;
+    for (std::size_t i = 0; i < 1000; i++) {
+        placebo += core_timing.GetGlobalTimeNs().count();
+    }
+    u64 end = core_timing.GetGlobalTimeNs().count();
+    return (end - start);
+}
+
+#pragma optimize("", on)
+
 TEST_CASE("HostTiming[BasicOrder]", "[core]") {
     ScopeInit guard;
     auto& core_timing = guard.core_timing;
@@ -85,18 +96,6 @@ TEST_CASE("HostTiming[BasicOrder]", "[core]") {
     }
 }
 
-#pragma optimize("", off)
-u64 TestTimerSpeed(Core::HostTiming::CoreTiming& core_timing) {
-    u64 start = core_timing.GetGlobalTimeNs().count();
-    u64 placebo = 0;
-    for (std::size_t i = 0; i < 1000; i++) {
-        placebo += core_timing.GetGlobalTimeNs().count();
-    }
-    u64 end = core_timing.GetGlobalTimeNs().count();
-    return (end - start);
-}
-#pragma optimize("", on)
-
 TEST_CASE("HostTiming[BasicOrderNoPausing]", "[core]") {
     ScopeInit guard;
     auto& core_timing = guard.core_timing;

From 7b893c7963a57bf41f5dad7dd1709985971ce291 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 19 Jun 2020 19:46:43 -0400
Subject: [PATCH 24/24] Common: Fix non-conan build

---
 src/common/CMakeLists.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index aacea0ab7..3cc17d0e9 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -187,6 +187,7 @@ if(ARCHITECTURE_x86_64)
 endif()
 
 create_target_directory_groups(common)
+find_package(Boost 1.71 COMPONENTS context headers REQUIRED)
 
-target_link_libraries(common PUBLIC Boost::boost fmt::fmt microprofile)
+target_link_libraries(common PUBLIC ${Boost_LIBRARIES} fmt::fmt microprofile)
 target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd xbyak)