// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Identify mismatches between assembly files and Go func declarations.
package main
import (
"bytes"
"fmt"
"go/ast"
"go/build"
"go/token"
"go/types"
"regexp"
"strconv"
"strings"
)
// 'kind' is a kind of assembly variable.
// The kinds 1, 2, 4, 8 stand for values of that size.
type asmKind int
// These special kinds are not valid sizes.
const (
asmString asmKind = 100 + iota
asmSlice
asmArray
asmInterface
asmEmptyInterface
asmStruct
asmComplex
)
// An asmArch describes assembly parameters for an architecture
type asmArch struct {
name string
bigEndian bool
stack string
lr bool
// calculated during initialization
sizes types.Sizes
intSize int
ptrSize int
maxAlign int
}
// An asmFunc describes the expected variables for a function on a given architecture.
type asmFunc struct {
arch *asmArch
size int // size of all arguments
vars map[string]*asmVar
varByOffset map[int]*asmVar
}
// An asmVar describes a single assembly variable.
type asmVar struct {
name string
kind asmKind
typ string
off int
size int
inner []*asmVar
}
var (
asmArch386 = asmArch{name: "386", bigEndian: false, stack: "SP", lr: false}
asmArchArm = asmArch{name: "arm", bigEndian: false, stack: "R13", lr: true}
asmArchArm64 = asmArch{name: "arm64", bigEndian: false, stack: "RSP", lr: true}
asmArchAmd64 = asmArch{name: "amd64", bigEndian: false, stack: "SP", lr: false}
asmArchAmd64p32 = asmArch{name: "amd64p32", bigEndian: false, stack: "SP", lr: false}
asmArchMips = asmArch{name: "mips", bigEndian: true, stack: "R29", lr: true}
asmArchMipsLE = asmArch{name: "mipsle", bigEndian: false, stack: "R29", lr: true}
asmArchMips64 = asmArch{name: "mips64", bigEndian: true, stack: "R29", lr: true}
asmArchMips64LE = asmArch{name: "mips64le", bigEndian: false, stack: "R29", lr: true}
asmArchPpc64 = asmArch{name: "ppc64", bigEndian: true, stack: "R1", lr: true}
asmArchPpc64LE = asmArch{name: "ppc64le", bigEndian: false, stack: "R1", lr: true}
asmArchS390X = asmArch{name: "s390x", bigEndian: true, stack: "R15", lr: true}
arches = []*asmArch{
&asmArch386,
&asmArchArm,
&asmArchArm64,
&asmArchAmd64,
&asmArchAmd64p32,
&asmArchMips,
&asmArchMipsLE,
&asmArchMips64,
&asmArchMips64LE,
&asmArchPpc64,
&asmArchPpc64LE,
&asmArchS390X,
}
)
func init() {
for _, arch := range arches {
arch.sizes = types.SizesFor("gc", arch.name)
if arch.sizes == nil {
panic("missing SizesFor for gc/" + arch.name)
}
arch.intSize = int(arch.sizes.Sizeof(types.Typ[types.Int]))
arch.ptrSize = int(arch.sizes.Sizeof(types.Typ[types.UnsafePointer]))
arch.maxAlign = int(arch.sizes.Alignof(types.Typ[types.Int64]))
}
}
var (
re = regexp.MustCompile
asmPlusBuild = re(`//\s+\+build\s+([^\n]+)`)
asmTEXT = re(`\bTEXT\b(.*)·([^\(]+)\(SB\)(?:\s*,\s*([0-9A-Z|+()]+))?(?:\s*,\s*\$(-?[0-9]+)(?:-([0-9]+))?)?`)
asmDATA = re(`\b(DATA|GLOBL)\b`)
asmNamedFP = re(`([a-zA-Z0-9_\xFF-\x{10FFFF}]+)(?:\+([0-9]+))\(FP\)`)
asmUnnamedFP = re(`[^+\-0-9](([0-9]+)\(FP\))`)
asmSP = re(`[^+\-0-9](([0-9]+)\(([A-Z0-9]+)\))`)
asmOpcode = re(`^\s*(?:[A-Z0-9a-z_]+:)?\s*([A-Z]+)\s*([^,]*)(?:,\s*(.*))?`)
ppc64Suff = re(`([BHWD])(ZU|Z|U|BR)?$`)
)
func asmCheck(pkg *Package) {
if !vet("asmdecl") {
return
}
// No work if no assembly files.
if !pkg.hasFileWithSuffix(".s") {
return
}
// Gather declarations. knownFunc[name][arch] is func description.
knownFunc := make(map[string]map[string]*asmFunc)
for _, f := range pkg.files {
if f.file != nil {
for _, decl := range f.file.Decls {
if decl, ok := decl.(*ast.FuncDecl); ok && decl.Body == nil {
knownFunc[decl.Name.Name] = f.asmParseDecl(decl)
}
}
}
}
Files:
for _, f := range pkg.files {
if !strings.HasSuffix(f.name, ".s") {
continue
}
Println("Checking file", f.name)
// Determine architecture from file name if possible.
var arch string
var archDef *asmArch
for _, a := range arches {
if strings.HasSuffix(f.name, "_"+a.name+".s") {
arch = a.name
archDef = a
break
}
}
lines := strings.SplitAfter(string(f.content), "\n")
var (
fn *asmFunc
fnName string
localSize, argSize int
wroteSP bool
haveRetArg bool
retLine []int
)
flushRet := func() {
if fn != nil && fn.vars["ret"] != nil && !haveRetArg && len(retLine) > 0 {
v := fn.vars["ret"]
for _, line := range retLine {
f.Badf(token.NoPos, "%s:%d: [%s] %s: RET without writing to %d-byte ret+%d(FP)", f.name, line, arch, fnName, v.size, v.off)
}
}
retLine = nil
}
for lineno, line := range lines {
lineno++
badf := func(format string, args ...interface{}) {
f.Badf(token.NoPos, "%s:%d: [%s] %s: %s", f.name, lineno, arch, fnName, fmt.Sprintf(format, args...))
}
if arch == "" {
// Determine architecture from +build line if possible.
if m := asmPlusBuild.FindStringSubmatch(line); m != nil {
// There can be multiple architectures in a single +build line,
// so accumulate them all and then prefer the one that
// matches build.Default.GOARCH.
var archCandidates []*asmArch
for _, fld := range strings.Fields(m[1]) {
for _, a := range arches {
if a.name == fld {
archCandidates = append(archCandidates, a)
}
}
}
for _, a := range archCandidates {
if a.name == build.Default.GOARCH {
archCandidates = []*asmArch{a}
break
}
}
if len(archCandidates) > 0 {
arch = archCandidates[0].name
archDef = archCandidates[0]
}
}
}
if m := asmTEXT.FindStringSubmatch(line); m != nil {
flushRet()
if arch == "" {
// Arch not specified by filename or build tags.
// Fall back to build.Default.GOARCH.
for _, a := range arches {
if a.name == build.Default.GOARCH {
arch = a.name
archDef = a
break
}
}
if arch == "" {
f.Warnf(token.NoPos, "%s: cannot determine architecture for assembly file", f.name)
continue Files
}
}
fnName = m[2]
if pkgName := strings.TrimSpace(m[1]); pkgName != "" {
pathParts := strings.Split(pkgName, "∕")
pkgName = pathParts[len(pathParts)-1]
if pkgName != f.pkg.path {
f.Warnf(token.NoPos, "%s:%d: [%s] cannot check cross-package assembly function: %s is in package %s", f.name, lineno, arch, fnName, pkgName)
fn = nil
fnName = ""
continue
}
}
fn = knownFunc[fnName][arch]
if fn != nil {
size, _ := strconv.Atoi(m[5])
flag := m[3]
if size != fn.size && (flag != "7" && !strings.Contains(flag, "NOSPLIT") || size != 0) {
badf("wrong argument size %d; expected $...-%d", size, fn.size)
}
}
localSize, _ = strconv.Atoi(m[4])
localSize += archDef.intSize
if archDef.lr {
// Account for caller's saved LR
localSize += archDef.intSize
}
argSize, _ = strconv.Atoi(m[5])
if fn == nil && !strings.Contains(fnName, "<>") {
badf("function %s missing Go declaration", fnName)
}
wroteSP = false
haveRetArg = false
continue
} else if strings.Contains(line, "TEXT") && strings.Contains(line, "SB") {
// function, but not visible from Go (didn't match asmTEXT), so stop checking
flushRet()
fn = nil
fnName = ""
continue
}
if strings.Contains(line, "RET") {
retLine = append(retLine, lineno)
}
if fnName == "" {
continue
}
if asmDATA.FindStringSubmatch(line) != nil {
fn = nil
}
if archDef == nil {
continue
}
if strings.Contains(line, ", "+archDef.stack) || strings.Contains(line, ",\t"+archDef.stack) {
wroteSP = true
continue
}
for _, m := range asmSP.FindAllStringSubmatch(line, -1) {
if m[3] != archDef.stack || wroteSP {
continue
}
off := 0
if m[1] != "" {
off, _ = strconv.Atoi(m[2])
}
if off >= localSize {
if fn != nil {
v := fn.varByOffset[off-localSize]
if v != nil {
badf("%s should be %s+%d(FP)", m[1], v.name, off-localSize)
continue
}
}
if off >= localSize+argSize {
badf("use of %s points beyond argument frame", m[1])
continue
}
badf("use of %s to access argument frame", m[1])
}
}
if fn == nil {
continue
}
for _, m := range asmUnnamedFP.FindAllStringSubmatch(line, -1) {
off, _ := strconv.Atoi(m[2])
v := fn.varByOffset[off]
if v != nil {
badf("use of unnamed argument %s; offset %d is %s+%d(FP)", m[1], off, v.name, v.off)
} else {
badf("use of unnamed argument %s", m[1])
}
}
for _, m := range asmNamedFP.FindAllStringSubmatch(line, -1) {
name := m[1]
off := 0
if m[2] != "" {
off, _ = strconv.Atoi(m[2])
}
if name == "ret" || strings.HasPrefix(name, "ret_") {
haveRetArg = true
}
v := fn.vars[name]
if v == nil {
// Allow argframe+0(FP).
if name == "argframe" && off == 0 {
continue
}
v = fn.varByOffset[off]
if v != nil {
badf("unknown variable %s; offset %d is %s+%d(FP)", name, off, v.name, v.off)
} else {
badf("unknown variable %s", name)
}
continue
}
asmCheckVar(badf, fn, line, m[0], off, v)
}
}
flushRet()
}
}
func asmKindForType(t types.Type, size int) asmKind {
switch t := t.Underlying().(type) {
case *types.Basic:
switch t.Kind() {
case types.String:
return asmString
case types.Complex64, types.Complex128:
return asmComplex
}
return asmKind(size)
case *types.Pointer, *types.Chan, *types.Map, *types.Signature:
return asmKind(size)
case *types.Struct:
return asmStruct
case *types.Interface:
if t.Empty() {
return asmEmptyInterface
}
return asmInterface
case *types.Array:
return asmArray
case *types.Slice:
return asmSlice
}
panic("unreachable")
}
// A component is an assembly-addressable component of a composite type,
// or a composite type itself.
type component struct {
size int
offset int
kind asmKind
typ string
suffix string // Such as _base for string base, _0_lo for lo half of first element of [1]uint64 on 32 bit machine.
outer string // The suffix for immediately containing composite type.
}
func newComponent(suffix string, kind asmKind, typ string, offset, size int, outer string) component {
return component{suffix: suffix, kind: kind, typ: typ, offset: offset, size: size, outer: outer}
}
// componentsOfType generates a list of components of type t.
// For example, given string, the components are the string itself, the base, and the length.
func componentsOfType(arch *asmArch, t types.Type) []component {
return appendComponentsRecursive(arch, t, nil, "", 0)
}
// appendComponentsRecursive implements componentsOfType.
// Recursion is required to correct handle structs and arrays,
// which can contain arbitrary other types.
func appendComponentsRecursive(arch *asmArch, t types.Type, cc []component, suffix string, off int) []component {
s := t.String()
size := int(arch.sizes.Sizeof(t))
kind := asmKindForType(t, size)
cc = append(cc, newComponent(suffix, kind, s, off, size, suffix))
switch kind {
case 8:
if arch.ptrSize == 4 {
w1, w2 := "lo", "hi"
if arch.bigEndian {
w1, w2 = w2, w1
}
cc = append(cc, newComponent(suffix+"_"+w1, 4, "half "+s, off, 4, suffix))
cc = append(cc, newComponent(suffix+"_"+w2, 4, "half "+s, off+4, 4, suffix))
}
case asmEmptyInterface:
cc = append(cc, newComponent(suffix+"_type", asmKind(arch.ptrSize), "interface type", off, arch.ptrSize, suffix))
cc = append(cc, newComponent(suffix+"_data", asmKind(arch.ptrSize), "interface data", off+arch.ptrSize, arch.ptrSize, suffix))
case asmInterface:
cc = append(cc, newComponent(suffix+"_itable", asmKind(arch.ptrSize), "interface itable", off, arch.ptrSize, suffix))
cc = append(cc, newComponent(suffix+"_data", asmKind(arch.ptrSize), "interface data", off+arch.ptrSize, arch.ptrSize, suffix))
case asmSlice:
cc = append(cc, newComponent(suffix+"_base", asmKind(arch.ptrSize), "slice base", off, arch.ptrSize, suffix))
cc = append(cc, newComponent(suffix+"_len", asmKind(arch.intSize), "slice len", off+arch.ptrSize, arch.intSize, suffix))
cc = append(cc, newComponent(suffix+"_cap", asmKind(arch.intSize), "slice cap", off+arch.ptrSize+arch.intSize, arch.intSize, suffix))
case asmString:
cc = append(cc, newComponent(suffix+"_base", asmKind(arch.ptrSize), "string base", off, arch.ptrSize, suffix))
cc = append(cc, newComponent(suffix+"_len", asmKind(arch.intSize), "string len", off+arch.ptrSize, arch.intSize, suffix))
case asmComplex:
fsize := size / 2
cc = append(cc, newComponent(suffix+"_real", asmKind(fsize), fmt.Sprintf("real(complex%d)", size*8), off, fsize, suffix))
cc = append(cc, newComponent(suffix+"_imag", asmKind(fsize), fmt.Sprintf("imag(complex%d)", size*8), off+fsize, fsize, suffix))
case asmStruct:
tu := t.Underlying().(*types.Struct)
fields := make([]*types.Var, tu.NumFields())
for i := 0; i < tu.NumFields(); i++ {
fields[i] = tu.Field(i)
}
offsets := arch.sizes.Offsetsof(fields)
for i, f := range fields {
cc = appendComponentsRecursive(arch, f.Type(), cc, suffix+"_"+f.Name(), off+int(offsets[i]))
}
case asmArray:
tu := t.Underlying().(*types.Array)
elem := tu.Elem()
// Calculate offset of each element array.
fields := []*types.Var{
types.NewVar(token.NoPos, nil, "fake0", elem),
types.NewVar(token.NoPos, nil, "fake1", elem),
}
offsets := arch.sizes.Offsetsof(fields)
elemoff := int(offsets[1])
for i := 0; i < int(tu.Len()); i++ {
cc = appendComponentsRecursive(arch, elem, cc, suffix+"_"+strconv.Itoa(i), i*elemoff)
}
}
return cc
}
// asmParseDecl parses a function decl for expected assembly variables.
func (f *File) asmParseDecl(decl *ast.FuncDecl) map[string]*asmFunc {
var (
arch *asmArch
fn *asmFunc
offset int
)
// addParams adds asmVars for each of the parameters in list.
// isret indicates whether the list are the arguments or the return values.
addParams := func(list []*ast.Field, isret bool) {
argnum := 0
for _, fld := range list {
t := f.pkg.types[fld.Type].Type
align := int(arch.sizes.Alignof(t))
size := int(arch.sizes.Sizeof(t))
offset += -offset & (align - 1)
cc := componentsOfType(arch, t)
// names is the list of names with this type.
names := fld.Names
if len(names) == 0 {
// Anonymous args will be called arg, arg1, arg2, ...
// Similarly so for return values: ret, ret1, ret2, ...
name := "arg"
if isret {
name = "ret"
}
if argnum > 0 {
name += strconv.Itoa(argnum)
}
names = []*ast.Ident{ast.NewIdent(name)}
}
argnum += len(names)
// Create variable for each name.
for _, id := range names {
name := id.Name
for _, c := range cc {
outer := name + c.outer
v := asmVar{
name: name + c.suffix,
kind: c.kind,
typ: c.typ,
off: offset + c.offset,
size: c.size,
}
if vo := fn.vars[outer]; vo != nil {
vo.inner = append(vo.inner, &v)
}
fn.vars[v.name] = &v
for i := 0; i < v.size; i++ {
fn.varByOffset[v.off+i] = &v
}
}
offset += size
}
}
}
m := make(map[string]*asmFunc)
for _, arch = range arches {
fn = &asmFunc{
arch: arch,
vars: make(map[string]*asmVar),
varByOffset: make(map[int]*asmVar),
}
offset = 0
addParams(decl.Type.Params.List, false)
if decl.Type.Results != nil && len(decl.Type.Results.List) > 0 {
offset += -offset & (arch.maxAlign - 1)
addParams(decl.Type.Results.List, true)
}
fn.size = offset
m[arch.name] = fn
}
return m
}
// asmCheckVar checks a single variable reference.
func asmCheckVar(badf func(string, ...interface{}), fn *asmFunc, line, expr string, off int, v *asmVar) {
m := asmOpcode.FindStringSubmatch(line)
if m == nil {
if !strings.HasPrefix(strings.TrimSpace(line), "//") {
badf("cannot find assembly opcode")
}
return
}
// Determine operand sizes from instruction.
// Typically the suffix suffices, but there are exceptions.
var src, dst, kind asmKind
op := m[1]
switch fn.arch.name + "." + op {
case "386.FMOVLP":
src, dst = 8, 4
case "arm.MOVD":
src = 8
case "arm.MOVW":
src = 4
case "arm.MOVH", "arm.MOVHU":
src = 2
case "arm.MOVB", "arm.MOVBU":
src = 1
// LEA* opcodes don't really read the second arg.
// They just take the address of it.
case "386.LEAL":
dst = 4
case "amd64.LEAQ":
dst = 8
case "amd64p32.LEAL":
dst = 4
default:
switch fn.arch.name {
case "386", "amd64":
if strings.HasPrefix(op, "F") && (strings.HasSuffix(op, "D") || strings.HasSuffix(op, "DP")) {
// FMOVDP, FXCHD, etc
src = 8
break
}
if strings.HasPrefix(op, "P") && strings.HasSuffix(op, "RD") {
// PINSRD, PEXTRD, etc
src = 4
break
}
if strings.HasPrefix(op, "F") && (strings.HasSuffix(op, "F") || strings.HasSuffix(op, "FP")) {
// FMOVFP, FXCHF, etc
src = 4
break
}
if strings.HasSuffix(op, "SD") {
// MOVSD, SQRTSD, etc
src = 8
break
}
if strings.HasSuffix(op, "SS") {
// MOVSS, SQRTSS, etc
src = 4
break
}
if strings.HasPrefix(op, "SET") {
// SETEQ, etc
src = 1
break
}
switch op[len(op)-1] {
case 'B':
src = 1
case 'W':
src = 2
case 'L':
src = 4
case 'D', 'Q':
src = 8
}
case "ppc64", "ppc64le":
// Strip standard suffixes to reveal size letter.
m := ppc64Suff.FindStringSubmatch(op)
if m != nil {
switch m[1][0] {
case 'B':
src = 1
case 'H':
src = 2
case 'W':
src = 4
case 'D':
src = 8
}
}
case "mips", "mipsle", "mips64", "mips64le":
switch op {
case "MOVB", "MOVBU":
src = 1
case "MOVH", "MOVHU":
src = 2
case "MOVW", "MOVWU", "MOVF":
src = 4
case "MOVV", "MOVD":
src = 8
}
case "s390x":
switch op {
case "MOVB", "MOVBZ":
src = 1
case "MOVH", "MOVHZ":
src = 2
case "MOVW", "MOVWZ", "FMOVS":
src = 4
case "MOVD", "FMOVD":
src = 8
}
}
}
if dst == 0 {
dst = src
}
// Determine whether the match we're holding
// is the first or second argument.
if strings.Index(line, expr) > strings.Index(line, ",") {
kind = dst
} else {
kind = src
}
vk := v.kind
vs := v.size
vt := v.typ
switch vk {
case asmInterface, asmEmptyInterface, asmString, asmSlice:
// allow reference to first word (pointer)
vk = v.inner[0].kind
vs = v.inner[0].size
vt = v.inner[0].typ
}
if off != v.off {
var inner bytes.Buffer
for i, vi := range v.inner {
if len(v.inner) > 1 {
fmt.Fprintf(&inner, ",")
}
fmt.Fprintf(&inner, " ")
if i == len(v.inner)-1 {
fmt.Fprintf(&inner, "or ")
}
fmt.Fprintf(&inner, "%s+%d(FP)", vi.name, vi.off)
}
badf("invalid offset %s; expected %s+%d(FP)%s", expr, v.name, v.off, inner.String())
return
}
if kind != 0 && kind != vk {
var inner bytes.Buffer
if len(v.inner) > 0 {
fmt.Fprintf(&inner, " containing")
for i, vi := range v.inner {
if i > 0 && len(v.inner) > 2 {
fmt.Fprintf(&inner, ",")
}
fmt.Fprintf(&inner, " ")
if i > 0 && i == len(v.inner)-1 {
fmt.Fprintf(&inner, "and ")
}
fmt.Fprintf(&inner, "%s+%d(FP)", vi.name, vi.off)
}
}
badf("invalid %s of %s; %s is %d-byte value%s", op, expr, vt, vs, inner.String())
}
}