/*--------------------------------------------------------------------*/ /*--- Linux ticket lock implementation ticket-lock-linux.c ---*/ /*--- ---*/ /*--- Guarantees fair scheduling even if multiple threads are ---*/ /*--- runnable at the same time on a multicore system. Has been ---*/ /*--- observed to cause a slow-down compared to the generic ---*/ /*--- scheduler lock with CPU frequency scaling enabled. Makes ---*/ /*--- Valgrind slightly faster if CPU frequency scaling has been ---*/ /*--- disabled. See also http://bugs.kde.org/show_bug.cgi?id=270006---*/ /*--------------------------------------------------------------------*/ /* This file is part of Valgrind, a dynamic binary instrumentation framework. Copyright (C) 2011-2015 Bart Van Assche <bvanassche@acm.org>. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. The GNU General Public License is contained in the file COPYING. */ #include "pub_core_basics.h" #include "pub_core_libcassert.h" #include "pub_core_libcbase.h" // VG_(memset)() #include "pub_core_libcprint.h" #include "pub_core_syscall.h" #include "pub_core_vki.h" #include "pub_core_vkiscnums.h" // __NR_futex #include "pub_core_libcproc.h" #include "pub_core_mallocfree.h" #include "pub_core_threadstate.h" #include "pub_core_inner.h" #if defined(ENABLE_INNER_CLIENT_REQUEST) #include "helgrind/helgrind.h" #endif #include "priv_sched-lock.h" #include "priv_sched-lock-impl.h" #define TL_FUTEX_COUNT_LOG2 4 #define TL_FUTEX_COUNT (1U << TL_FUTEX_COUNT_LOG2) #define TL_FUTEX_MASK (TL_FUTEX_COUNT - 1) struct sched_lock { volatile unsigned head; volatile unsigned tail; volatile unsigned futex[TL_FUTEX_COUNT]; int owner; }; #if 1 static Bool s_debug; #else static Bool s_debug = True; #endif static const HChar *get_sched_lock_name(void) { return "ticket lock"; } static struct sched_lock *create_sched_lock(void) { struct sched_lock *p; p = VG_(malloc)("sched_lock", sizeof(*p)); // The futex syscall requires that a futex takes four bytes. vg_assert(sizeof(p->futex[0]) == 4); VG_(memset)(p, 0, sizeof(*p)); INNER_REQUEST(ANNOTATE_RWLOCK_CREATE(p)); INNER_REQUEST(ANNOTATE_BENIGN_RACE_SIZED(&p->futex, sizeof(p->futex), "")); return p; } static void destroy_sched_lock(struct sched_lock *p) { INNER_REQUEST(ANNOTATE_RWLOCK_DESTROY(p)); VG_(free)(p); } static int get_sched_lock_owner(struct sched_lock *p) { return p->owner; } /* * Acquire ticket lock. Increment the tail of the queue and use the original * value as the ticket value. Wait until the head of the queue equals the * ticket value. The futex used to wait depends on the ticket value in order * to avoid that all threads get woken up every time a ticket lock is * released. That last effect is sometimes called the "thundering herd" * effect. * * See also Nick Piggin, x86: FIFO ticket spinlocks, Linux kernel mailing list * (http://lkml.org/lkml/2007/11/1/125) for more info. */ static void acquire_sched_lock(struct sched_lock *p) { unsigned ticket, futex_value; volatile unsigned *futex; SysRes sres; ticket = __sync_fetch_and_add(&p->tail, 1); futex = &p->futex[ticket & TL_FUTEX_MASK]; if (s_debug) VG_(printf)("[%d/%d] acquire: ticket %u\n", VG_(getpid)(), VG_(gettid)(), ticket); for (;;) { futex_value = *futex; __sync_synchronize(); if (ticket == p->head) break; if (s_debug) VG_(printf)("[%d/%d] acquire: ticket %u - waiting until" " futex[%ld] != %u\n", VG_(getpid)(), VG_(gettid)(), ticket, (long)(futex - p->futex), futex_value); sres = VG_(do_syscall3)(__NR_futex, (UWord)futex, VKI_FUTEX_WAIT | VKI_FUTEX_PRIVATE_FLAG, futex_value); if (sr_isError(sres) && sr_Err(sres) != VKI_EAGAIN) { VG_(printf)("futex_wait() returned error code %lu\n", sr_Err(sres)); vg_assert(False); } } __sync_synchronize(); INNER_REQUEST(ANNOTATE_RWLOCK_ACQUIRED(p, /*is_w*/1)); vg_assert(p->owner == 0); p->owner = VG_(gettid)(); } /* * Release a ticket lock by incrementing the head of the queue. Only generate * a thread wakeup signal if at least one thread is waiting. If the queue tail * matches the wakeup_ticket value, no threads have to be woken up. * * Note: tail will only be read after head has been incremented since both are * declared as volatile and since the __sync...() functions imply a memory * barrier. */ static void release_sched_lock(struct sched_lock *p) { unsigned wakeup_ticket, futex_value; volatile unsigned *futex; SysRes sres; vg_assert(p->owner != 0); p->owner = 0; INNER_REQUEST(ANNOTATE_RWLOCK_RELEASED(p, /*is_w*/1)); wakeup_ticket = __sync_fetch_and_add(&p->head, 1) + 1; if (p->tail != wakeup_ticket) { futex = &p->futex[wakeup_ticket & TL_FUTEX_MASK]; futex_value = __sync_fetch_and_add(futex, 1); if (s_debug) VG_(printf)("[%d/%d] release: waking up ticket %u (futex[%ld] = %u)" "\n", VG_(getpid)(), VG_(gettid)(), wakeup_ticket, (long)(futex - p->futex), futex_value); sres = VG_(do_syscall3)(__NR_futex, (UWord)futex, VKI_FUTEX_WAKE | VKI_FUTEX_PRIVATE_FLAG, 0x7fffffff); vg_assert(!sr_isError(sres)); } else { if (s_debug) VG_(printf)("[%d/%d] release: no thread is waiting for ticket %u\n", VG_(getpid)(), VG_(gettid)(), wakeup_ticket); } } const struct sched_lock_ops ML_(linux_ticket_lock_ops) = { .get_sched_lock_name = get_sched_lock_name, .create_sched_lock = create_sched_lock, .destroy_sched_lock = destroy_sched_lock, .get_sched_lock_owner = get_sched_lock_owner, .acquire_sched_lock = acquire_sched_lock, .release_sched_lock = release_sched_lock, };