Arachne  0.1
Arachne.h
1 /* Copyright (c) 2015-2016 Stanford University
2  *
3  * Permission to use, copy, modify, and distribute this software for any
4  * purpose with or without fee is hereby granted, provided that the above
5  * copyright notice and this permission notice appear in all copies.
6  *
7  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR(S) DISCLAIM ALL WARRANTIES
8  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL AUTHORS BE LIABLE FOR
10  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14  */
15 
16 #ifndef ARACHNE_H_
17 #define ARACHNE_H_
18 
19 #include <assert.h>
20 #include <stdio.h>
21 #include <functional>
22 #include <vector>
23 #include <mutex>
24 #include <deque>
25 #include <atomic>
26 #include <queue>
27 #include <string>
28 
29 
30 #include "../PerfUtils/Cycles.h"
31 #include "Common.h"
32 
33 namespace Arachne {
34 
35 using PerfUtils::Cycles;
36 
37 // Forward declare to break circular dependency between ThreadContext and
38 // ConditionVariable
39 struct ThreadContext;
40 
41 // This is used in createThread.
42 extern volatile uint32_t numCores;
43 extern volatile uint32_t maxNumCores;
44 
45 // Used in inline functions.
46 extern FILE* errorStream;
47 void dispatch();
48 
64 struct ThreadId {
69  uint32_t generation;
70 
77  ThreadId(ThreadContext* context, uint32_t generation)
78  : context(context)
79  , generation(generation) { }
80 
81  ThreadId()
82  : context(NULL)
83  , generation(0) { }
84 
87  bool
88  operator==(const ThreadId& other) const {
89  return context == other.context && generation == other.generation;
90  }
91 
93  bool
94  operator!=(const ThreadId& other) const {
95  return !(*this == other);
96  }
97 };
98 
99 void init(int* argcp = NULL, const char** argv = NULL);
100 void shutDown();
101 void waitForTermination();
102 void yield();
103 void sleep(uint64_t ns);
104 
109 inline void block() {
110  dispatch();
111 }
112 void signal(ThreadId id);
113 void join(ThreadId id);
115 
116 void setErrorStream(FILE* ptr);
117 void testInit();
118 void testDestroy();
119 
123 class SpinLock {
124  public:
126  explicit SpinLock(std::string name) : state(false), name(name) {}
127  SpinLock() : state(false), name("unnamed") {}
128  ~SpinLock(){}
129 
131  inline void
132  lock() {
133  uint64_t startOfContention = 0;
134  while (state.exchange(true, std::memory_order_acquire) != false) {
135  if (startOfContention == 0) {
136  startOfContention = Cycles::rdtsc();
137  } else {
138  uint64_t now = Cycles::rdtsc();
139  if (Cycles::toSeconds(now - startOfContention) > 1.0) {
140  fprintf(errorStream,
141  "%s SpinLock locked for one second; deadlock?\n",
142  name.c_str());
143  startOfContention = now;
144  }
145  }
146  yield();
147  }
148  }
149 
155  inline bool
157  // If the original value was false, then we successfully acquired the
158  // lock. Otherwise we failed.
159  return !state.exchange(true, std::memory_order_acquire);
160  }
161 
163  inline void
164  unlock() {
165  state.store(false, std::memory_order_release);
166  }
167 
169  inline void
170  setName(std::string name) {
171  this->name = name;
172  }
173 
174  private:
175  // Implements the lock: false means free, true means locked
176  std::atomic<bool> state;
177 
178  // Descriptive name for this SpinLock. Used to identify the purpose of
179  // the lock, what it protects, where it exists in the codebase, etc.
180  //
181  // Used to identify the lock when reporting a potential deadlock.
182  std::string name;
183 };
184 
190  public:
193  void notifyOne();
194  void notifyAll();
195  template <typename LockType> void wait(LockType& lock);
196  template <typename LockType> void waitFor(LockType& lock, uint64_t ns);
197  private:
198  // Ordered collection of threads that are waiting on this condition
199  // variable. Threads are processed from this list in FIFO order when a
200  // notifyOne() is called.
201  std::deque<ThreadId> blockedThreads;
202  DISALLOW_COPY_AND_ASSIGN(ConditionVariable);
203 };
204 
215 // The declarations in following section are private to the thread library.
218 
226  virtual void runThread() = 0;
227  virtual ~ThreadInvocationEnabler() { }
228 };
229 
244 template<typename F>
248 
251  explicit ThreadInvocation(F mainFunction)
252  : mainFunction(mainFunction) {
253  static_assert(sizeof(ThreadInvocation<F>) <= CACHE_LINE_SIZE,
254  "Arachne requires the function and arguments for a thread to "
255  "fit within one cache line.");
256  }
257 
260  void
262  mainFunction();
263  }
264 };
265 
272  void* stack;
273 
276  void* sp;
277 
283  volatile uint64_t wakeupTimeInCycles;
284 
288  uint32_t generation;
289 
293 
297 
301  uint8_t idInCore;
302 
308 
310  struct alignas(CACHE_LINE_SIZE) {
311  char data[CACHE_LINE_SIZE];
312  }
315 
316  ThreadContext() = delete;
317  ThreadContext(ThreadContext&) = delete;
318 
319  explicit ThreadContext(uint8_t idInCore);
320 };
321 
322 // Largest number of Arachne threads that can be simultaneously created on each
323 // core.
324 const int maxThreadsPerCore = 56;
325 
331 const size_t SpaceForSavedRegisters = 48;
332 
336 const uint64_t BLOCKED = ~0L;
337 
342 const uint64_t UNOCCUPIED = ~0L - 1;
343 
344 void schedulerMainLoop();
345 void swapcontext(void **saved, void **target);
346 void threadMain(int id);
347 
348 extern thread_local int kernelThreadId;
349 extern thread_local ThreadContext *loadedContext;
350 extern thread_local ThreadContext** localThreadContexts;
351 extern std::vector<ThreadContext**> allThreadContexts;
352 
359  uint64_t occupied : 56;
361  uint8_t numOccupied : 8;
362 };
363 
364 extern std::vector< std::atomic<MaskAndCount> *> occupiedAndCount;
365 extern thread_local std::atomic<MaskAndCount> *localOccupiedAndCount;
366 
367 #ifdef TEST
368 static std::deque<uint64_t> mockRandomValues;
369 #endif
370 
374 inline uint64_t
375 random(void) {
376 #ifdef TEST
377  if (!mockRandomValues.empty()) {
378  uint64_t returnValue = mockRandomValues.front();
379  mockRandomValues.pop_front();
380  return returnValue;
381  }
382 #endif
383 
384  // This function came from the following site.
385  // http://stackoverflow.com/a/1640399/391161
386  //
387  // It was chosen because it was advertised to be fast, but this fact has
388  // not yet been verified or disproved through experiments.
389  static uint64_t x = 123456789, y = 362436069, z = 521288629;
390  uint64_t t;
391  x ^= x << 16;
392  x ^= x >> 5;
393  x ^= x << 1;
394 
395  t = x;
396  x = y;
397  y = z;
398  z = t ^ x ^ y;
399 
400  return z;
401 }
402 
423 template<typename _Callable, typename... _Args>
424 ThreadId
425 createThread(int kId, _Callable&& __f, _Args&&... __args) {
426  if (kId == -1)
427  kId = kernelThreadId;
428 
429  auto task = std::bind(
430  std::forward<_Callable>(__f), std::forward<_Args>(__args)...);
431 
432  bool success;
433  uint32_t index;
434  do {
435  // Each iteration through this loop makes one attempt to enqueue the
436  // task to the specified core. Multiple iterations are required only if
437  // there is contention for the core's state variables.
438  MaskAndCount slotMap = *occupiedAndCount[kId];
439  MaskAndCount oldSlotMap = slotMap;
440 
441  if (slotMap.numOccupied >= maxThreadsPerCore)
442  return NullThread;
443 
444  // Search for a non-occupied slot and attempt to reserve the slot
445  index = 0;
446  while ((slotMap.occupied & (1L << index)) && index < maxThreadsPerCore)
447  index++;
448 
449  slotMap.occupied =
450  (slotMap.occupied | (1L << index)) & 0x00FFFFFFFFFFFFFF;
451  slotMap.numOccupied++;
452 
453  success = occupiedAndCount[kId]->compare_exchange_strong(oldSlotMap,
454  slotMap);
455  } while (!success);
456 
457  // Copy the thread invocation into the byte array.
458  new (&allThreadContexts[kId][index]->threadInvocation)
459  Arachne::ThreadInvocation<decltype(task)>(task);
460  allThreadContexts[kId][index]->wakeupTimeInCycles = 0;
461  return ThreadId(allThreadContexts[kId][index],
462  allThreadContexts[kId][index]->generation);
463 }
464 
466 // The ends the private section of the thread library.
468 
485 template<typename _Callable, typename... _Args>
486 ThreadId
487 createThread(_Callable&& __f, _Args&&... __args) {
488  // Find a kernel thread to enqueue to by picking two at random and choosing
489  // the one with the fewest Arachne threads.
490  int kId;
491  int choice1 = static_cast<int>(random()) % numCores;
492  int choice2 = static_cast<int>(random()) % numCores;
493  while (choice2 == choice1 && numCores > 1)
494  choice2 = static_cast<int>(random()) % numCores;
495 
496  if (occupiedAndCount[choice1]->load().numOccupied <
497  occupiedAndCount[choice2]->load().numOccupied)
498  kId = choice1;
499  else
500  kId = choice2;
501 
502  return createThread(kId, __f, __args...);
503 }
504 
514 template <typename LockType> void
515 ConditionVariable::wait(LockType& lock) {
516 #if TIME_TRACE
517  TimeTrace::record("Wait on Core %d", kernelThreadId);
518 #endif
519  blockedThreads.push_back(
520  ThreadId(loadedContext, loadedContext->generation));
521  lock.unlock();
522  dispatch();
523 #if TIME_TRACE
524  TimeTrace::record("About to acquire lock after waking up");
525 #endif
526  lock.lock();
527 }
528 
541 template <typename LockType> void
542 ConditionVariable::waitFor(LockType& lock, uint64_t ns) {
543  blockedThreads.push_back(
544  ThreadId(loadedContext, loadedContext->generation));
545  loadedContext->wakeupTimeInCycles =
546  Cycles::rdtsc() + Cycles::fromNanoseconds(ns);
547  lock.unlock();
548  dispatch();
549  lock.lock();
550 }
551 
552 } // namespace Arachne
553 
554 #endif // ARACHNE_H_
Definition: Arachne.cc:23
ThreadId getThreadId()
Definition: Arachne.cc:310
void testDestroy()
Definition: Arachne.cc:660
uint8_t numOccupied
The number of 1 bits in occupied.
Definition: Arachne.h:361
uint64_t occupied
Definition: Arachne.h:359
const Arachne::ThreadId NullThread
Definition: Arachne.h:212
void unlock()
Definition: Arachne.h:164
void join(ThreadId id)
Definition: Arachne.cc:403
volatile uint64_t wakeupTimeInCycles
Definition: Arachne.h:283
bool operator==(const ThreadId &other) const
Definition: Arachne.h:88
void * sp
Definition: Arachne.h:276
Definition: Arachne.h:223
Definition: Arachne.h:189
uint8_t idInCore
Definition: Arachne.h:301
void waitFor(LockType &lock, uint64_t ns)
Definition: Arachne.h:542
void yield()
Definition: Arachne.cc:284
SpinLock joinLock
Definition: Arachne.h:292
void lock()
Definition: Arachne.h:132
bool try_lock()
Definition: Arachne.h:156
ThreadContext * context
The storage where this thread&#39;s state is held.
Definition: Arachne.h:66
void init(int *argcp, const char **argv)
Definition: Arachne.cc:582
void waitForTermination()
Definition: Arachne.cc:418
Definition: Arachne.h:245
Definition: Arachne.h:64
void setName(std::string name)
Definition: Arachne.h:170
threadInvocation
Definition: Arachne.h:314
bool operator!=(const ThreadId &other) const
Negation of the function above.
Definition: Arachne.h:94
SpinLock(std::string name)
Definition: Arachne.h:126
uint32_t generation
Definition: Arachne.h:69
F mainFunction
The top-level function of the Arachne thread.
Definition: Arachne.h:247
void runThread()
Definition: Arachne.h:261
This structure tracks the live threads on a single core.
Definition: Arachne.h:354
void sleep(uint64_t ns)
Definition: Arachne.cc:296
uint32_t generation
Definition: Arachne.h:288
Definition: Arachne.h:123
ConditionVariable joinCV
Definition: Arachne.h:296
void * stack
Definition: Arachne.h:272
ThreadId createThread(_Callable &&__f, _Args &&...__args)
Definition: Arachne.h:487
Definition: Arachne.h:269
void shutDown()
Definition: Arachne.cc:683
ThreadInvocation(F mainFunction)
Definition: Arachne.h:251
void wait(LockType &lock)
Definition: Arachne.h:515
void setErrorStream(FILE *stream)
Definition: Arachne.cc:721
ThreadId(ThreadContext *context, uint32_t generation)
Definition: Arachne.h:77
void block()
Definition: Arachne.h:109
void signal(ThreadId id)
Definition: Arachne.cc:383
void testInit()
Definition: Arachne.cc:635