/* * Copyright 2018 Google Inc. * * Use of this source code is governed by a BSD-style license that can * be found in the LICENSE file. * */ // // // #include <assert.h> #include <memory.h> #include "runtime_cl_12.h" #include "scheduler.h" // // // #ifndef NDEBUG #include <stdio.h> #define SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,subbuf_id,ss) \ fprintf(stderr, \ "suballocator %s : [ %4u ] : alloc( %9u ) @ %4u = %u\n", \ suballocator->name, \ suballocator->rem.avail, \ (skc_uint)ss, \ subbuf_id, \ (skc_uint)suballocator->total); #define SKC_SUBALLOCATOR_DEBUG_FREE(suballocator,subbuf_id,ss) \ fprintf(stderr, \ "suballocator %s : [ %4u ] : free ( %9u ) @ %4u = %u\n", \ suballocator->name, \ suballocator->rem.avail, \ (skc_uint)ss, \ subbuf_id, \ (skc_uint)suballocator->total); #else #define SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,subbuf_id,ss) #define SKC_SUBALLOCATOR_DEBUG_FREE(suballocator,subbuf_id,ss) #endif // // // void skc_suballocator_create(struct skc_runtime * const runtime, struct skc_suballocator * const suballocator, char const * const name, skc_uint const subbufs, size_t const align, size_t const size) { size_t const subbufs_size = sizeof(*suballocator->subbufs) * subbufs; // allocate array of subbuf records suballocator->subbufs = skc_runtime_host_perm_alloc(runtime,SKC_MEM_FLAGS_READ_WRITE,subbufs_size); // zero subbufs memset(suballocator->subbufs,0,subbufs_size); // initialize starting subbuf suballocator->subbufs[0].size = (skc_subbuf_size_t)size; // allocate array of ids suballocator->ids = skc_runtime_host_perm_alloc(runtime, SKC_MEM_FLAGS_READ_WRITE, sizeof(*suballocator->ids) * subbufs); for (skc_uint ii=0; ii<subbufs; ii++) suballocator->ids[ii] = ii; suballocator->rem.avail = 1; suballocator->rem.spare = subbufs - 1; suballocator->align = (skc_uint)align; suballocator->count = subbufs; suballocator->size = (skc_subbuf_size_t)size; suballocator->total = 0; suballocator->name = name; } void skc_suballocator_dispose(struct skc_runtime * const runtime, struct skc_suballocator * const suballocator) { skc_runtime_host_perm_free(runtime,suballocator->ids); skc_runtime_host_perm_free(runtime,suballocator->subbufs); } // // Sets id and returns origin // size_t skc_suballocator_subbuf_alloc(struct skc_suballocator * const suballocator, struct skc_scheduler * const scheduler, size_t const size, skc_subbuf_id_t * const subbuf_id, size_t * const subbuf_size) { // // Note that we can't deadlock here because everything allocated is // expected to be freed within msecs. Worst case, we wait for a // availability of resources while a fully utilized GPU is making // forward progress on kernels. // // This behavior should guide the sizing of the suballocator's // number of subbuffers and extent. // // We want to allocate a large enough extent and enough subbuffer // records so that the CPU/GPU is never starved. // // round up the size skc_subbuf_size_t const size_ru = (skc_subbuf_size_t)SKC_ROUND_UP_POW2(size,suballocator->align); // save it if (subbuf_size != NULL) *subbuf_size = size_ru; // // We precheck to see there is at least one region of memory // available but do not check to see if there is a spare. Instead, // we simply keep looking for an exact fit. // skc_subbuf_id_t * const ids = suballocator->ids; while (true) { skc_uint avail_rem = suballocator->rem.avail; skc_uint spare_rem = suballocator->rem.spare; for (skc_uint avail_idx=0; avail_idx<avail_rem; avail_idx++) { skc_subbuf_id_t const avail_id = ids[avail_idx]; struct skc_subbuf * const avail = suballocator->subbufs + avail_id; assert(avail->inuse == 0); if (avail->size == size_ru) // size matches exactly { suballocator->total += size_ru; // return this id *subbuf_id = avail_id; SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,avail_id,size_ru); // mark the subbuffer as in use avail->inuse += 1; assert(avail->inuse == 1); // update rem avail count suballocator->rem.avail = --avail_rem; // replace now inuse id with last avail id if ((avail_rem > 0) && (avail_idx != avail_rem)) { skc_subbuf_id_t const last_id = ids[avail_rem]; struct skc_subbuf * const last = suballocator->subbufs + last_id; ids[avail_idx] = last_id; // move id last->idx = avail_idx; // update idx[] } assert(suballocator->rem.avail > 0); // return origin return avail->origin; } else if ((avail->size > size_ru) && (spare_rem > 0)) // requested is less than available so split it { suballocator->total += size_ru; skc_uint spare_idx = suballocator->count - spare_rem; skc_subbuf_id_t const spare_id = ids[spare_idx]; struct skc_subbuf * const spare = suballocator->subbufs + spare_id; assert(spare->inuse == 0); // simple -- we're popping the top-of-stack of spares suballocator->rem.spare -= 1; // return id *subbuf_id = spare_id; SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,spare_id,size_ru); // get prev struct skc_subbuf * const prev = avail->prev; if (prev != NULL) prev->next = spare; // init spare spare->prev = prev; spare->next = avail; spare->size = size_ru; spare->origin = avail->origin; spare->idx = SKC_UINT_MAX; // defensive spare->inuse += 1; // update curr avail->prev = spare; avail->size -= size_ru; avail->origin += size_ru; assert(suballocator->rem.avail > 0); return spare->origin; } } // uh oh... couldn't find enough memory skc_scheduler_wait(scheduler); } } // // FIXME -- simplify this with a merge-with-prev() primitive // void skc_suballocator_subbuf_free(struct skc_suballocator * const suballocator, skc_subbuf_id_t subbuf_id) { // get subbuf for id struct skc_subbuf * const subbuf = suballocator->subbufs + subbuf_id; assert(subbuf->inuse == 1); suballocator->total -= subbuf->size; SKC_SUBALLOCATOR_DEBUG_FREE(suballocator,subbuf_id,subbuf->size); // // try to merge subbuf with left and maybe right and then dispose // struct skc_subbuf * prev; struct skc_subbuf * next; if (((prev = subbuf->prev) != NULL) && !prev->inuse) { next = subbuf->next; if ((next != NULL) && !next->inuse) { subbuf->inuse -= 1; assert(next->inuse == 0); // increment size prev->size += (subbuf->size + next->size); struct skc_subbuf * const nextnext = next->next; // update next link prev->next = nextnext; // update prev link if (nextnext != NULL) nextnext->prev = prev; // // both subbuf and next are now spare which means we need to // move final available subbuffer into next's old position // unless they're the same // skc_uint const last_idx = --suballocator->rem.avail; skc_uint const next_idx = next->idx; assert(suballocator->rem.avail > 0); if (last_idx != next_idx) { skc_subbuf_id_t const last_id = suballocator->ids[last_idx]; struct skc_subbuf * const last = suballocator->subbufs + last_id; suballocator->ids[next_idx] = last_id; last->idx = next_idx; } skc_subbuf_id_t const next_id = (skc_subbuf_id_t)(next - suballocator->subbufs); skc_uint const spare_rem = suballocator->rem.spare + 2; skc_uint const spare_idx = suballocator->count - spare_rem; suballocator->rem.spare = spare_rem; suballocator->ids[spare_idx + 0] = subbuf_id; suballocator->ids[spare_idx + 1] = next_id; } else { prev->size += subbuf->size; prev->next = next; if (next != NULL) next->prev = prev; subbuf->inuse -= 1; assert(subbuf->inuse == 0); assert(suballocator->rem.avail > 0); suballocator->ids[suballocator->count - ++suballocator->rem.spare] = subbuf_id; } } // // try to merge with right // else if (((next = subbuf->next) != NULL) && !next->inuse) { subbuf->inuse -= 1; assert(subbuf->inuse == 0); assert(suballocator->rem.avail > 0); next->prev = prev; next->origin = subbuf->origin; next->size += subbuf->size; if (prev != NULL) prev->next = next; // subbuf is now spare suballocator->ids[suballocator->count - ++suballocator->rem.spare] = subbuf_id; } else // couldn't merge with a neighbor { skc_uint avail_idx = suballocator->rem.avail++; // subbuf is now available subbuf->idx = avail_idx; subbuf->inuse -= 1; assert(subbuf->inuse == 0); assert(suballocator->rem.avail > 0); suballocator->ids[avail_idx] = subbuf_id; } } // // // #if 0 // // At some point there might be a reason to sort the available // subbuffers into some useful order -- presumably to binary search // for the closest match or to chip away at the largest available // subbuffer // static void skc_suballocator_optimize(struct skc_suballocator * const suballocator) { ; } #endif // // //