// Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package tar import ( "bytes" "fmt" "strconv" "strings" "time" ) // hasNUL reports whether the NUL character exists within s. func hasNUL(s string) bool { return strings.IndexByte(s, 0) >= 0 } // isASCII reports whether the input is an ASCII C-style string. func isASCII(s string) bool { for _, c := range s { if c >= 0x80 || c == 0x00 { return false } } return true } // toASCII converts the input to an ASCII C-style string. // This a best effort conversion, so invalid characters are dropped. func toASCII(s string) string { if isASCII(s) { return s } b := make([]byte, 0, len(s)) for _, c := range s { if c < 0x80 && c != 0x00 { b = append(b, byte(c)) } } return string(b) } type parser struct { err error // Last error seen } type formatter struct { err error // Last error seen } // parseString parses bytes as a NUL-terminated C-style string. // If a NUL byte is not found then the whole slice is returned as a string. func (*parser) parseString(b []byte) string { if i := bytes.IndexByte(b, 0); i >= 0 { return string(b[:i]) } return string(b) } // formatString copies s into b, NUL-terminating if possible. func (f *formatter) formatString(b []byte, s string) { if len(s) > len(b) { f.err = ErrFieldTooLong } copy(b, s) if len(s) < len(b) { b[len(s)] = 0 } // Some buggy readers treat regular files with a trailing slash // in the V7 path field as a directory even though the full path // recorded elsewhere (e.g., via PAX record) contains no trailing slash. if len(s) > len(b) && b[len(b)-1] == '/' { n := len(strings.TrimRight(s[:len(b)], "/")) b[n] = 0 // Replace trailing slash with NUL terminator } } // fitsInBase256 reports whether x can be encoded into n bytes using base-256 // encoding. Unlike octal encoding, base-256 encoding does not require that the // string ends with a NUL character. Thus, all n bytes are available for output. // // If operating in binary mode, this assumes strict GNU binary mode; which means // that the first byte can only be either 0x80 or 0xff. Thus, the first byte is // equivalent to the sign bit in two's complement form. func fitsInBase256(n int, x int64) bool { binBits := uint(n-1) * 8 return n >= 9 || (x >= -1<<binBits && x < 1<<binBits) } // parseNumeric parses the input as being encoded in either base-256 or octal. // This function may return negative numbers. // If parsing fails or an integer overflow occurs, err will be set. func (p *parser) parseNumeric(b []byte) int64 { // Check for base-256 (binary) format first. // If the first bit is set, then all following bits constitute a two's // complement encoded number in big-endian byte order. if len(b) > 0 && b[0]&0x80 != 0 { // Handling negative numbers relies on the following identity: // -a-1 == ^a // // If the number is negative, we use an inversion mask to invert the // data bytes and treat the value as an unsigned number. var inv byte // 0x00 if positive or zero, 0xff if negative if b[0]&0x40 != 0 { inv = 0xff } var x uint64 for i, c := range b { c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing if i == 0 { c &= 0x7f // Ignore signal bit in first byte } if (x >> 56) > 0 { p.err = ErrHeader // Integer overflow return 0 } x = x<<8 | uint64(c) } if (x >> 63) > 0 { p.err = ErrHeader // Integer overflow return 0 } if inv == 0xff { return ^int64(x) } return int64(x) } // Normal case is base-8 (octal) format. return p.parseOctal(b) } // formatNumeric encodes x into b using base-8 (octal) encoding if possible. // Otherwise it will attempt to use base-256 (binary) encoding. func (f *formatter) formatNumeric(b []byte, x int64) { if fitsInOctal(len(b), x) { f.formatOctal(b, x) return } if fitsInBase256(len(b), x) { for i := len(b) - 1; i >= 0; i-- { b[i] = byte(x) x >>= 8 } b[0] |= 0x80 // Highest bit indicates binary format return } f.formatOctal(b, 0) // Last resort, just write zero f.err = ErrFieldTooLong } func (p *parser) parseOctal(b []byte) int64 { // Because unused fields are filled with NULs, we need // to skip leading NULs. Fields may also be padded with // spaces or NULs. // So we remove leading and trailing NULs and spaces to // be sure. b = bytes.Trim(b, " \x00") if len(b) == 0 { return 0 } x, perr := strconv.ParseUint(p.parseString(b), 8, 64) if perr != nil { p.err = ErrHeader } return int64(x) } func (f *formatter) formatOctal(b []byte, x int64) { if !fitsInOctal(len(b), x) { x = 0 // Last resort, just write zero f.err = ErrFieldTooLong } s := strconv.FormatInt(x, 8) // Add leading zeros, but leave room for a NUL. if n := len(b) - len(s) - 1; n > 0 { s = strings.Repeat("0", n) + s } f.formatString(b, s) } // fitsInOctal reports whether the integer x fits in a field n-bytes long // using octal encoding with the appropriate NUL terminator. func fitsInOctal(n int, x int64) bool { octBits := uint(n-1) * 3 return x >= 0 && (n >= 22 || x < 1<<octBits) } // parsePAXTime takes a string of the form %d.%d as described in the PAX // specification. Note that this implementation allows for negative timestamps, // which is allowed for by the PAX specification, but not always portable. func parsePAXTime(s string) (time.Time, error) { const maxNanoSecondDigits = 9 // Split string into seconds and sub-seconds parts. ss, sn := s, "" if pos := strings.IndexByte(s, '.'); pos >= 0 { ss, sn = s[:pos], s[pos+1:] } // Parse the seconds. secs, err := strconv.ParseInt(ss, 10, 64) if err != nil { return time.Time{}, ErrHeader } if len(sn) == 0 { return time.Unix(secs, 0), nil // No sub-second values } // Parse the nanoseconds. if strings.Trim(sn, "0123456789") != "" { return time.Time{}, ErrHeader } if len(sn) < maxNanoSecondDigits { sn += strings.Repeat("0", maxNanoSecondDigits-len(sn)) // Right pad } else { sn = sn[:maxNanoSecondDigits] // Right truncate } nsecs, _ := strconv.ParseInt(sn, 10, 64) // Must succeed if len(ss) > 0 && ss[0] == '-' { return time.Unix(secs, -1*nsecs), nil // Negative correction } return time.Unix(secs, nsecs), nil } // formatPAXTime converts ts into a time of the form %d.%d as described in the // PAX specification. This function is capable of negative timestamps. func formatPAXTime(ts time.Time) (s string) { secs, nsecs := ts.Unix(), ts.Nanosecond() if nsecs == 0 { return strconv.FormatInt(secs, 10) } // If seconds is negative, then perform correction. sign := "" if secs < 0 { sign = "-" // Remember sign secs = -(secs + 1) // Add a second to secs nsecs = -(nsecs - 1E9) // Take that second away from nsecs } return strings.TrimRight(fmt.Sprintf("%s%d.%09d", sign, secs, nsecs), "0") } // parsePAXRecord parses the input PAX record string into a key-value pair. // If parsing is successful, it will slice off the currently read record and // return the remainder as r. func parsePAXRecord(s string) (k, v, r string, err error) { // The size field ends at the first space. sp := strings.IndexByte(s, ' ') if sp == -1 { return "", "", s, ErrHeader } // Parse the first token as a decimal integer. n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int if perr != nil || n < 5 || int64(len(s)) < n { return "", "", s, ErrHeader } // Extract everything between the space and the final newline. rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:] if nl != "\n" { return "", "", s, ErrHeader } // The first equals separates the key from the value. eq := strings.IndexByte(rec, '=') if eq == -1 { return "", "", s, ErrHeader } k, v = rec[:eq], rec[eq+1:] if !validPAXRecord(k, v) { return "", "", s, ErrHeader } return k, v, rem, nil } // formatPAXRecord formats a single PAX record, prefixing it with the // appropriate length. func formatPAXRecord(k, v string) (string, error) { if !validPAXRecord(k, v) { return "", ErrHeader } const padding = 3 // Extra padding for ' ', '=', and '\n' size := len(k) + len(v) + padding size += len(strconv.Itoa(size)) record := strconv.Itoa(size) + " " + k + "=" + v + "\n" // Final adjustment if adding size field increased the record size. if len(record) != size { size = len(record) record = strconv.Itoa(size) + " " + k + "=" + v + "\n" } return record, nil } // validPAXRecord reports whether the key-value pair is valid where each // record is formatted as: // "%d %s=%s\n" % (size, key, value) // // Keys and values should be UTF-8, but the number of bad writers out there // forces us to be a more liberal. // Thus, we only reject all keys with NUL, and only reject NULs in values // for the PAX version of the USTAR string fields. // The key must not contain an '=' character. func validPAXRecord(k, v string) bool { if k == "" || strings.IndexByte(k, '=') >= 0 { return false } switch k { case paxPath, paxLinkpath, paxUname, paxGname: return !hasNUL(v) default: return !hasNUL(k) } }