Threading: Reworked the way our scheduler works.

Threads will now be awakened when the objects they're waiting on are signaled, instead of repeating the WaitSynchronization call every now and then.

The scheduler is now called once after every SVC call, and once after a thread is awakened from sleep by its timeout callback.

This new implementation is based off reverse-engineering of the real kernel.

See https://gist.github.com/Subv/02f29bd9f1e5deb7aceea1e8f019c8f4 for a more detailed description of how the real kernel handles rescheduling.
This commit is contained in:
Subv 2016-12-03 22:38:14 -05:00
parent 0423a38ab5
commit 8634b8cb83
8 changed files with 189 additions and 199 deletions

View file

@ -230,7 +230,7 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const {
list.push_back(std::make_unique<WaitTreeMutexList>(thread.held_mutexes));
}
if (thread.status == THREADSTATUS_WAIT_SYNCH) {
list.push_back(std::make_unique<WaitTreeObjectList>(thread.wait_objects, thread.wait_all));
list.push_back(std::make_unique<WaitTreeObjectList>(thread.wait_objects, !thread.wait_objects.empty()));
}
return list;

View file

@ -79,8 +79,6 @@ ResultCode AddressArbiter::ArbitrateAddress(ArbitrationType type, VAddr address,
ErrorSummary::WrongArgument, ErrorLevel::Usage);
}
HLE::Reschedule(__func__);
// The calls that use a timeout seem to always return a Timeout error even if they did not put
// the thread to sleep
if (type == ArbitrationType::WaitIfLessThanWithTimeout ||

View file

@ -31,13 +31,62 @@ void WaitObject::RemoveWaitingThread(Thread* thread) {
waiting_threads.erase(itr);
}
SharedPtr<Thread> WaitObject::GetHighestPriorityReadyThread() {
// Remove the threads that are ready or already running from our waitlist
waiting_threads.erase(std::remove_if(waiting_threads.begin(), waiting_threads.end(), [](SharedPtr<Thread> thread) -> bool {
return thread->status == THREADSTATUS_RUNNING || thread->status == THREADSTATUS_READY;
}), waiting_threads.end());
if (waiting_threads.empty())
return nullptr;
auto candidate_threads = waiting_threads;
// Eliminate all threads that are waiting on more than one object, and not all of them are ready
candidate_threads.erase(std::remove_if(candidate_threads.begin(), candidate_threads.end(), [](SharedPtr<Thread> thread) -> bool {
for (auto object : thread->wait_objects)
if (object->ShouldWait())
return true;
return false;
}), candidate_threads.end());
// Return the thread with the lowest priority value (The one with the highest priority)
auto thread_itr = std::min_element(candidate_threads.begin(), candidate_threads.end(), [](const SharedPtr<Thread>& lhs, const SharedPtr<Thread>& rhs) {
return lhs->current_priority < rhs->current_priority;
});
if (thread_itr == candidate_threads.end())
return nullptr;
return *thread_itr;
}
void WaitObject::WakeupAllWaitingThreads() {
for (auto thread : waiting_threads)
// Wake up all threads that can be awoken, in priority order
while (auto thread = GetHighestPriorityReadyThread()) {
if (thread->wait_objects.empty()) {
Acquire();
// Set the output index of the WaitSynchronizationN call to the index of this object.
if (thread->wait_set_output) {
thread->SetWaitSynchronizationOutput(thread->GetWaitObjectIndex(this));
thread->wait_set_output = false;
}
} else {
for (auto object : thread->wait_objects) {
object->Acquire();
// Remove the thread from the object's waitlist
object->RemoveWaitingThread(thread.get());
}
// Note: This case doesn't update the output index of WaitSynchronizationN.
// Clear the thread's waitlist
thread->wait_objects.clear();
}
// Set the result of the call to WaitSynchronization to RESULT_SUCCESS
thread->SetWaitSynchronizationResult(RESULT_SUCCESS);
thread->ResumeFromWait();
waiting_threads.clear();
HLE::Reschedule(__func__);
// Note: Removing the thread from the object's waitlist will be done by GetHighestPriorityReadyThread
}
}
const std::vector<SharedPtr<Thread>>& WaitObject::GetWaitingThreads() const {

View file

@ -155,6 +155,9 @@ public:
/// Wake up all threads waiting on this object
void WakeupAllWaitingThreads();
/// Obtains the highest priority thread that is ready to run from this object's waiting list.
SharedPtr<Thread> GetHighestPriorityReadyThread();
/// Get a const reference to the waiting threads list for debug use
const std::vector<SharedPtr<Thread>>& GetWaitingThreads() const;

View file

@ -120,8 +120,6 @@ void Thread::Stop() {
u32 tls_slot =
((tls_address - Memory::TLS_AREA_VADDR) % Memory::PAGE_SIZE) / Memory::TLS_ENTRY_SIZE;
Kernel::g_current_process->tls_slots[tls_page].reset(tls_slot);
HLE::Reschedule(__func__);
}
Thread* ArbitrateHighestPriorityThread(u32 address) {
@ -180,50 +178,6 @@ static void PriorityBoostStarvedThreads() {
}
}
/**
* Gets the registers for timeout parameter of the next WaitSynchronization call.
* @param thread a pointer to the thread that is ready to call WaitSynchronization
* @returns a tuple of two register pointers to low and high part of the timeout parameter
*/
static std::tuple<u32*, u32*> GetWaitSynchTimeoutParameterRegister(Thread* thread) {
bool thumb_mode = (thread->context.cpsr & TBIT) != 0;
u16 thumb_inst = Memory::Read16(thread->context.pc & 0xFFFFFFFE);
u32 inst = Memory::Read32(thread->context.pc & 0xFFFFFFFC) & 0x0FFFFFFF;
if ((thumb_mode && thumb_inst == 0xDF24) || (!thumb_mode && inst == 0x0F000024)) {
// svc #0x24 (WaitSynchronization1)
return std::make_tuple(&thread->context.cpu_registers[2],
&thread->context.cpu_registers[3]);
} else if ((thumb_mode && thumb_inst == 0xDF25) || (!thumb_mode && inst == 0x0F000025)) {
// svc #0x25 (WaitSynchronizationN)
return std::make_tuple(&thread->context.cpu_registers[0],
&thread->context.cpu_registers[4]);
}
UNREACHABLE();
}
/**
* Updates the WaitSynchronization timeout parameter according to the difference
* between ticks of the last WaitSynchronization call and the incoming one.
* @param timeout_low a pointer to the register for the low part of the timeout parameter
* @param timeout_high a pointer to the register for the high part of the timeout parameter
* @param last_tick tick of the last WaitSynchronization call
*/
static void UpdateTimeoutParameter(u32* timeout_low, u32* timeout_high, u64 last_tick) {
s64 timeout = ((s64)*timeout_high << 32) | *timeout_low;
if (timeout != -1) {
timeout -= cyclesToUs(CoreTiming::GetTicks() - last_tick) * 1000; // in nanoseconds
if (timeout < 0)
timeout = 0;
*timeout_low = timeout & 0xFFFFFFFF;
*timeout_high = timeout >> 32;
}
}
/**
* Switches the CPU's active thread context to that of the specified thread
* @param new_thread The thread to switch to
@ -254,32 +208,6 @@ static void SwitchContext(Thread* new_thread) {
current_thread = new_thread;
// If the thread was waited by a svcWaitSynch call, step back PC by one instruction to rerun
// the SVC when the thread wakes up. This is necessary to ensure that the thread can acquire
// the requested wait object(s) before continuing.
if (new_thread->waitsynch_waited) {
// CPSR flag indicates CPU mode
bool thumb_mode = (new_thread->context.cpsr & TBIT) != 0;
// SVC instruction is 2 bytes for THUMB, 4 bytes for ARM
new_thread->context.pc -= thumb_mode ? 2 : 4;
// Get the register for timeout parameter
u32 *timeout_low, *timeout_high;
std::tie(timeout_low, timeout_high) = GetWaitSynchTimeoutParameterRegister(new_thread);
// Update the timeout parameter
UpdateTimeoutParameter(timeout_low, timeout_high, new_thread->last_running_ticks);
}
// Clean up the thread's wait_objects, they'll be restored if needed during
// the svcWaitSynchronization call
for (size_t i = 0; i < new_thread->wait_objects.size(); ++i) {
SharedPtr<WaitObject> object = new_thread->wait_objects[i];
object->RemoveWaitingThread(new_thread);
}
new_thread->wait_objects.clear();
ready_queue.remove(new_thread->current_priority, new_thread);
new_thread->status = THREADSTATUS_RUNNING;
@ -319,17 +247,13 @@ static Thread* PopNextReadyThread() {
void WaitCurrentThread_Sleep() {
Thread* thread = GetCurrentThread();
thread->status = THREADSTATUS_WAIT_SLEEP;
HLE::Reschedule(__func__);
}
void WaitCurrentThread_WaitSynchronization(std::vector<SharedPtr<WaitObject>> wait_objects,
bool wait_set_output, bool wait_all) {
bool wait_set_output) {
Thread* thread = GetCurrentThread();
thread->wait_set_output = wait_set_output;
thread->wait_all = wait_all;
thread->wait_objects = std::move(wait_objects);
thread->waitsynch_waited = true;
thread->status = THREADSTATUS_WAIT_SYNCH;
}
@ -351,15 +275,11 @@ static void ThreadWakeupCallback(u64 thread_handle, int cycles_late) {
return;
}
thread->waitsynch_waited = false;
if (thread->status == THREADSTATUS_WAIT_SYNCH || thread->status == THREADSTATUS_WAIT_ARB) {
thread->wait_set_output = false;
thread->SetWaitSynchronizationResult(ResultCode(ErrorDescription::Timeout, ErrorModule::OS,
ErrorSummary::StatusChanged,
ErrorLevel::Info));
if (thread->wait_set_output)
thread->SetWaitSynchronizationOutput(-1);
}
thread->ResumeFromWait();
@ -399,6 +319,7 @@ void Thread::ResumeFromWait() {
ready_queue.push_back(current_priority, this);
status = THREADSTATUS_READY;
HLE::Reschedule(__func__);
}
/**
@ -494,13 +415,11 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
thread->last_running_ticks = CoreTiming::GetTicks();
thread->processor_id = processor_id;
thread->wait_set_output = false;
thread->wait_all = false;
thread->wait_objects.clear();
thread->wait_address = 0;
thread->name = std::move(name);
thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom();
thread->owner_process = g_current_process;
thread->waitsynch_waited = false;
// Find the next available TLS index, and mark it as used
auto& tls_slots = Kernel::g_current_process->tls_slots;
@ -555,8 +474,6 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
ready_queue.push_back(thread->current_priority, thread.get());
thread->status = THREADSTATUS_READY;
HLE::Reschedule(__func__);
return MakeResult<SharedPtr<Thread>>(std::move(thread));
}
@ -619,14 +536,6 @@ void Reschedule() {
HLE::DoneRescheduling();
// Don't bother switching to the same thread.
// But if the thread was waiting on objects, we still need to switch it
// to perform PC modification, change state to RUNNING, etc.
// This occurs in the case when an object the thread is waiting on immediately wakes up
// the current thread before Reschedule() is called.
if (next == cur && (next == nullptr || next->waitsynch_waited == false))
return;
if (cur && next) {
LOG_TRACE(Kernel, "context switch %u -> %u", cur->GetObjectId(), next->GetObjectId());
} else if (cur) {

View file

@ -5,6 +5,7 @@
#pragma once
#include <string>
#include <unordered_map>
#include <vector>
#include <boost/container/flat_set.hpp>
#include "common/common_types.h"
@ -124,6 +125,16 @@ public:
*/
void SetWaitSynchronizationOutput(s32 output);
/**
* Retrieves the index that this particular object occupies in the list of objects
* that the thread passed to WaitSynchronizationN.
* It is used to set the output value of WaitSynchronizationN when the thread is awakened.
* @param object Object to query the index of.
*/
s32 GetWaitObjectIndex(WaitObject* object) {
return wait_objects_index[object->GetObjectId()];
}
/**
* Stops a thread, invalidating it from further use
*/
@ -154,16 +165,16 @@ public:
VAddr tls_address; ///< Virtual address of the Thread Local Storage of the thread
bool waitsynch_waited; ///< Set to true if the last svcWaitSynch call caused the thread to wait
/// Mutexes currently held by this thread, which will be released when it exits.
boost::container::flat_set<SharedPtr<Mutex>> held_mutexes;
SharedPtr<Process> owner_process; ///< Process that owns this thread
std::vector<SharedPtr<WaitObject>> wait_objects; ///< Objects that the thread is waiting on
std::unordered_map<int, s32> wait_objects_index; ///< Mapping of Object ids to their position in the last waitlist that this object waited on.
VAddr wait_address; ///< If waiting on an AddressArbiter, this is the arbitration address
bool wait_all; ///< True if the thread is waiting on all objects before resuming
bool wait_set_output; ///< True if the output parameter should be set on thread wakeup
bool wait_set_output; ///< True if the WaitSynchronizationN output parameter should be set on thread wakeup
std::string name;
@ -215,10 +226,9 @@ void WaitCurrentThread_Sleep();
* @param wait_objects Kernel objects that we are waiting on
* @param wait_set_output If true, set the output parameter on thread wakeup (for
* WaitSynchronizationN only)
* @param wait_all If true, wait on all objects before resuming (for WaitSynchronizationN only)
*/
void WaitCurrentThread_WaitSynchronization(std::vector<SharedPtr<WaitObject>> wait_objects,
bool wait_set_output, bool wait_all);
bool wait_set_output);
/**
* Waits the current thread from an ArbitrateAddress call

View file

@ -60,14 +60,10 @@ void Timer::Set(s64 initial, s64 interval) {
u64 initial_microseconds = initial / 1000;
CoreTiming::ScheduleEvent(usToCycles(initial_microseconds), timer_callback_event_type,
callback_handle);
HLE::Reschedule(__func__);
}
void Timer::Cancel() {
CoreTiming::UnscheduleEvent(timer_callback_event_type, callback_handle);
HLE::Reschedule(__func__);
}
void Timer::Clear() {

View file

@ -249,27 +249,30 @@ static ResultCode WaitSynchronization1(Handle handle, s64 nano_seconds) {
auto object = Kernel::g_handle_table.GetWaitObject(handle);
Kernel::Thread* thread = Kernel::GetCurrentThread();
thread->waitsynch_waited = false;
if (object == nullptr)
return ERR_INVALID_HANDLE;
LOG_TRACE(Kernel_SVC, "called handle=0x%08X(%s:%s), nanoseconds=%lld", handle,
object->GetTypeName().c_str(), object->GetName().c_str(), nano_seconds);
HLE::Reschedule(__func__);
// Check for next thread to schedule
if (object->ShouldWait()) {
if (nano_seconds == 0)
return ResultCode(ErrorDescription::Timeout, ErrorModule::OS,
ErrorSummary::StatusChanged,
ErrorLevel::Info);
object->AddWaitingThread(thread);
Kernel::WaitCurrentThread_WaitSynchronization({object}, false, false);
thread->status = THREADSTATUS_WAIT_SYNCH;
// Create an event to wake the thread up after the specified nanosecond delay has passed
thread->WakeAfterDelay(nano_seconds);
// NOTE: output of this SVC will be set later depending on how the thread resumes
return HLE::RESULT_INVALID;
// Note: The output of this SVC will be set to RESULT_SUCCESS if the thread resumes due to a signal in one of its wait objects.
// Otherwise we retain the default value of timeout.
return ResultCode(ErrorDescription::Timeout, ErrorModule::OS,
ErrorSummary::StatusChanged,
ErrorLevel::Info);
}
object->Acquire();
@ -283,8 +286,6 @@ static ResultCode WaitSynchronizationN(s32* out, Handle* handles, s32 handle_cou
bool wait_thread = !wait_all;
int handle_index = 0;
Kernel::Thread* thread = Kernel::GetCurrentThread();
bool was_waiting = thread->waitsynch_waited;
thread->waitsynch_waited = false;
// Check if 'handles' is invalid
if (handles == nullptr)
@ -300,90 +301,113 @@ static ResultCode WaitSynchronizationN(s32* out, Handle* handles, s32 handle_cou
return ResultCode(ErrorDescription::OutOfRange, ErrorModule::OS,
ErrorSummary::InvalidArgument, ErrorLevel::Usage);
// If 'handle_count' is non-zero, iterate through each handle and wait the current thread if
// necessary
if (handle_count != 0) {
bool selected = false; // True once an object has been selected
using ObjectPtr = Kernel::SharedPtr<Kernel::WaitObject>;
Kernel::SharedPtr<Kernel::WaitObject> wait_object;
std::vector<ObjectPtr> objects(handle_count);
for (int i = 0; i < handle_count; ++i) {
auto object = Kernel::g_handle_table.GetWaitObject(handles[i]);
if (object == nullptr)
return ERR_INVALID_HANDLE;
// Check if the current thread should wait on this object...
if (object->ShouldWait()) {
// Check we are waiting on all objects...
if (wait_all)
// Wait the thread
wait_thread = true;
} else {
// Do not wait on this object, check if this object should be selected...
if (!wait_all && (!selected || (wait_object == object && was_waiting))) {
// Do not wait the thread
wait_thread = false;
handle_index = i;
wait_object = object;
selected = true;
}
}
}
} else {
// If no handles were passed in, put the thread to sleep only when 'wait_all' is false
// NOTE: This should deadlock the current thread if no timeout was specified
if (!wait_all) {
wait_thread = true;
}
}
SCOPE_EXIT({
HLE::Reschedule("WaitSynchronizationN");
}); // Reschedule after putting the threads to sleep.
// If thread should wait, then set its state to waiting
if (wait_thread) {
// Actually wait the current thread on each object if we decided to wait...
std::vector<SharedPtr<Kernel::WaitObject>> wait_objects;
wait_objects.reserve(handle_count);
for (int i = 0; i < handle_count; ++i) {
auto object = Kernel::g_handle_table.GetWaitObject(handles[i]);
object->AddWaitingThread(Kernel::GetCurrentThread());
wait_objects.push_back(object);
}
Kernel::WaitCurrentThread_WaitSynchronization(std::move(wait_objects), true, wait_all);
// Create an event to wake the thread up after the specified nanosecond delay has passed
Kernel::GetCurrentThread()->WakeAfterDelay(nano_seconds);
// NOTE: output of this SVC will be set later depending on how the thread resumes
return HLE::RESULT_INVALID;
}
// Acquire objects if we did not wait...
for (int i = 0; i < handle_count; ++i) {
auto object = Kernel::g_handle_table.GetWaitObject(handles[i]);
// Acquire the object if it is not waiting...
if (!object->ShouldWait()) {
object->Acquire();
// If this was the first non-waiting object and 'wait_all' is false, don't acquire
// any other objects
if (!wait_all)
break;
}
if (object == nullptr)
return ERR_INVALID_HANDLE;
objects[i] = object;
}
// TODO(bunnei): If 'wait_all' is true, this is probably wrong. However, real hardware does
// not seem to set it to any meaningful value.
*out = handle_count != 0 ? (wait_all ? -1 : handle_index) : 0;
// Clear the mapping of wait object indices
thread->wait_objects_index.clear();
return RESULT_SUCCESS;
if (!wait_all) {
// Find the first object that is acquireable in the provided list of objects
auto itr = std::find_if(objects.begin(), objects.end(), [](const ObjectPtr& object) {
return !object->ShouldWait();
});
if (itr != objects.end()) {
// We found a ready object, acquire it and set the result value
ObjectPtr object = *itr;
object->Acquire();
*out = std::distance(objects.begin(), itr);
return RESULT_SUCCESS;
}
// No objects were ready to be acquired, prepare to suspend the thread.
// If a timeout value of 0 was provided, just return the Timeout error code instead of suspending the thread.
if (nano_seconds == 0) {
return ResultCode(ErrorDescription::Timeout, ErrorModule::OS,
ErrorSummary::StatusChanged,
ErrorLevel::Info);
}
// Put the thread to sleep
thread->status = THREADSTATUS_WAIT_SYNCH;
// Clear the thread's waitlist, we won't use it for wait_all = false
thread->wait_objects.clear();
// Add the thread to each of the objects' waiting threads.
for (int i = 0; i < objects.size(); ++i) {
ObjectPtr object = objects[i];
// Set the index of this object in the mapping of Objects -> index for this thread.
thread->wait_objects_index[object->GetObjectId()] = i;
object->AddWaitingThread(thread);
// TODO(Subv): Perform things like update the mutex lock owner's priority to prevent priority inversion.
}
// Note: If no handles and no timeout were given, then the thread will deadlock, this is consistent with hardware behavior.
// Create an event to wake the thread up after the specified nanosecond delay has passed
thread->WakeAfterDelay(nano_seconds);
// Note: The output of this SVC will be set to RESULT_SUCCESS if the thread resumes due to a signal in one of its wait objects.
// Otherwise we retain the default value of timeout, and -1 in the out parameter
thread->wait_set_output = true;
*out = -1;
return ResultCode(ErrorDescription::Timeout, ErrorModule::OS,
ErrorSummary::StatusChanged,
ErrorLevel::Info);
} else {
bool all_available = std::all_of(objects.begin(), objects.end(), [](const ObjectPtr& object) {
return !object->ShouldWait();
});
if (all_available) {
// We can acquire all objects right now, do so.
for (auto object : objects)
object->Acquire();
// Note: In this case, the `out` parameter is not set, and retains whatever value it had before.
return RESULT_SUCCESS;
}
// Not all objects were available right now, prepare to suspend the thread.
// If a timeout value of 0 was provided, just return the Timeout error code instead of suspending the thread.
if (nano_seconds == 0) {
return ResultCode(ErrorDescription::Timeout, ErrorModule::OS,
ErrorSummary::StatusChanged,
ErrorLevel::Info);
}
// Put the thread to sleep
thread->status = THREADSTATUS_WAIT_SYNCH;
// Set the thread's waitlist to the list of objects passed to WaitSynchronizationN
thread->wait_objects = objects;
// Add the thread to each of the objects' waiting threads.
for (auto object : objects) {
object->AddWaitingThread(thread);
// TODO(Subv): Perform things like update the mutex lock owner's priority to prevent priority inversion.
}
// Create an event to wake the thread up after the specified nanosecond delay has passed
thread->WakeAfterDelay(nano_seconds);
// This value gets set to -1 by default in this case, it is not modified after this.
*out = -1;
// Note: The output of this SVC will be set to RESULT_SUCCESS if the thread resumes due to a signal in one of its wait objects.
return ResultCode(ErrorDescription::Timeout, ErrorModule::OS,
ErrorSummary::StatusChanged,
ErrorLevel::Info);
}
}
/// Create an address arbiter (to allocate access to shared resources)
@ -1148,6 +1172,7 @@ void CallSVC(u32 immediate) {
if (info) {
if (info->func) {
info->func();
HLE::Reschedule(__func__);
} else {
LOG_ERROR(Kernel_SVC, "unimplemented SVC function %s(..)", info->name);
}