// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Malloc small size classes. // // See malloc.go for overview. // // The size classes are chosen so that rounding an allocation // request up to the next size class wastes at most 12.5% (1.125x). // // Each size class has its own page count that gets allocated // and chopped up when new objects of the size class are needed. // That page count is chosen so that chopping up the run of // pages into objects of the given size wastes at most 12.5% (1.125x) // of the memory. It is not necessary that the cutoff here be // the same as above. // // The two sources of waste multiply, so the worst possible case // for the above constraints would be that allocations of some // size might have a 26.6% (1.266x) overhead. // In practice, only one of the wastes comes into play for a // given size (sizes < 512 waste mainly on the round-up, // sizes > 512 waste mainly on the page chopping). // // TODO(rsc): Compute max waste for any given size. package runtime // Size classes. Computed and initialized by InitSizes. // // SizeToClass(0 <= n <= MaxSmallSize) returns the size class, // 1 <= sizeclass < NumSizeClasses, for n. // Size class 0 is reserved to mean "not small". // // class_to_size[i] = largest size in class i // class_to_allocnpages[i] = number of pages to allocate when // making new objects in class i // The SizeToClass lookup is implemented using two arrays, // one mapping sizes <= 1024 to their class and one mapping // sizes >= 1024 and <= MaxSmallSize to their class. // All objects are 8-aligned, so the first array is indexed by // the size divided by 8 (rounded up). Objects >= 1024 bytes // are 128-aligned, so the second array is indexed by the // size divided by 128 (rounded up). The arrays are filled in // by InitSizes. var class_to_size [_NumSizeClasses]int32 var class_to_allocnpages [_NumSizeClasses]int32 var class_to_divmagic [_NumSizeClasses]divMagic var size_to_class8 [1024/8 + 1]int8 var size_to_class128 [(_MaxSmallSize-1024)/128 + 1]int8 func sizeToClass(size int32) int32 { if size > _MaxSmallSize { throw("SizeToClass - invalid size") } if size > 1024-8 { return int32(size_to_class128[(size-1024+127)>>7]) } return int32(size_to_class8[(size+7)>>3]) } func initSizes() { // Initialize the runtime·class_to_size table (and choose class sizes in the process). class_to_size[0] = 0 sizeclass := 1 // 0 means no class align := 8 for size := align; size <= _MaxSmallSize; size += align { if size&(size-1) == 0 { // bump alignment once in a while if size >= 2048 { align = 256 } else if size >= 128 { align = size / 8 } else if size >= 16 { align = 16 // required for x86 SSE instructions, if we want to use them } } if align&(align-1) != 0 { throw("InitSizes - bug") } // Make the allocnpages big enough that // the leftover is less than 1/8 of the total, // so wasted space is at most 12.5%. allocsize := _PageSize for allocsize%size > allocsize/8 { allocsize += _PageSize } npages := allocsize >> _PageShift // If the previous sizeclass chose the same // allocation size and fit the same number of // objects into the page, we might as well // use just this size instead of having two // different sizes. if sizeclass > 1 && npages == int(class_to_allocnpages[sizeclass-1]) && allocsize/size == allocsize/int(class_to_size[sizeclass-1]) { class_to_size[sizeclass-1] = int32(size) continue } class_to_allocnpages[sizeclass] = int32(npages) class_to_size[sizeclass] = int32(size) sizeclass++ } if sizeclass != _NumSizeClasses { print("sizeclass=", sizeclass, " NumSizeClasses=", _NumSizeClasses, "\n") throw("InitSizes - bad NumSizeClasses") } // Initialize the size_to_class tables. nextsize := 0 for sizeclass = 1; sizeclass < _NumSizeClasses; sizeclass++ { for ; nextsize < 1024 && nextsize <= int(class_to_size[sizeclass]); nextsize += 8 { size_to_class8[nextsize/8] = int8(sizeclass) } if nextsize >= 1024 { for ; nextsize <= int(class_to_size[sizeclass]); nextsize += 128 { size_to_class128[(nextsize-1024)/128] = int8(sizeclass) } } } // Double-check SizeToClass. if false { for n := int32(0); n < _MaxSmallSize; n++ { sizeclass := sizeToClass(n) if sizeclass < 1 || sizeclass >= _NumSizeClasses || class_to_size[sizeclass] < n { print("size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n") print("incorrect SizeToClass\n") goto dump } if sizeclass > 1 && class_to_size[sizeclass-1] >= n { print("size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n") print("SizeToClass too big\n") goto dump } } } testdefersizes() // Copy out for statistics table. for i := 0; i < len(class_to_size); i++ { memstats.by_size[i].size = uint32(class_to_size[i]) } for i := 1; i < len(class_to_size); i++ { class_to_divmagic[i] = computeDivMagic(uint32(class_to_size[i])) } return dump: if true { print("NumSizeClasses=", _NumSizeClasses, "\n") print("runtime·class_to_size:") for sizeclass = 0; sizeclass < _NumSizeClasses; sizeclass++ { print(" ", class_to_size[sizeclass], "") } print("\n\n") print("size_to_class8:") for i := 0; i < len(size_to_class8); i++ { print(" ", i*8, "=>", size_to_class8[i], "(", class_to_size[size_to_class8[i]], ")\n") } print("\n") print("size_to_class128:") for i := 0; i < len(size_to_class128); i++ { print(" ", i*128, "=>", size_to_class128[i], "(", class_to_size[size_to_class128[i]], ")\n") } print("\n") } throw("InitSizes failed") } // Returns size of the memory block that mallocgc will allocate if you ask for the size. func roundupsize(size uintptr) uintptr { if size < _MaxSmallSize { if size <= 1024-8 { return uintptr(class_to_size[size_to_class8[(size+7)>>3]]) } else { return uintptr(class_to_size[size_to_class128[(size-1024+127)>>7]]) } } if size+_PageSize < size { return size } return round(size, _PageSize) } // divMagic holds magic constants to implement division // by a particular constant as a shift, multiply, and shift. // That is, given // m = computeMagic(d) // then // n/d == ((n>>m.shift) * m.mul) >> m.shift2 // // The magic computation picks m such that // d = d₁*d₂ // d₂= 2^m.shift // m.mul = ⌈2^m.shift2 / d₁⌉ // // The magic computation here is tailored for malloc block sizes // and does not handle arbitrary d correctly. Malloc block sizes d are // always even, so the first shift implements the factors of 2 in d // and then the mul and second shift implement the odd factor // that remains. Because the first shift divides n by at least 2 (actually 8) // before the multiply gets involved, the huge corner cases that // require additional adjustment are impossible, so the usual // fixup is not needed. // // For more details see Hacker's Delight, Chapter 10, and // http://ridiculousfish.com/blog/posts/labor-of-division-episode-i.html // http://ridiculousfish.com/blog/posts/labor-of-division-episode-iii.html type divMagic struct { shift uint8 mul uint32 shift2 uint8 baseMask uintptr } func computeDivMagic(d uint32) divMagic { var m divMagic // If the size is a power of two, heapBitsForObject can divide even faster by masking. // Compute this mask. if d&(d-1) == 0 { // It is a power of 2 (assuming dinptr != 1) m.baseMask = ^(uintptr(d) - 1) } else { m.baseMask = 0 } // Compute pre-shift by factoring power of 2 out of d. for d&1 == 0 { m.shift++ d >>= 1 } // Compute largest k such that ⌈2^k / d⌉ fits in a 32-bit int. // This is always a good enough approximation. // We could use smaller k for some divisors but there's no point. k := uint8(63) d64 := uint64(d) for ((1<<k)+d64-1)/d64 >= 1<<32 { k-- } m.mul = uint32(((1 << k) + d64 - 1) / d64) // ⌈2^k / d⌉ m.shift2 = k return m }