Golang程序  |  174行  |  3.67 KB

// Copyright 2017 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// fileslist.py replacement written in GO, which utilizes multi-cores.

package main

import (
	"crypto/sha256"
	"encoding/json"
	"flag"
	"fmt"
	"io"
	"os"
	"path/filepath"
	"runtime"
	"sort"
	"strings"
	"sync"
)

const (
	MAX_DEFAULT_PARA = 24
)

func defaultPara() int {
	ret := runtime.NumCPU()
	if ret > MAX_DEFAULT_PARA {
		return MAX_DEFAULT_PARA
	}
	return ret
}

var (
	para = flag.Int("para", defaultPara(), "Number of goroutines")
)

// Represents each file.
type Node struct {
	SHA256 string
	Name   string // device side path.
	Size   int64
	path   string // host side path.
	stat   os.FileInfo
}

func newNode(hostPath string, devicePath string, stat os.FileInfo) Node {
	return Node{Name: devicePath, path: hostPath, stat: stat}
}

// Scan a Node and returns true if it should be added to the result.
func (n *Node) scan() bool {
	n.Size = n.stat.Size()

	// Calculate SHA256.
	h := sha256.New()
	if n.stat.Mode()&os.ModeSymlink == 0 {
		f, err := os.Open(n.path)
		if err != nil {
			panic(err)
		}
		defer f.Close()

		if _, err := io.Copy(h, f); err != nil {
			panic(err)
		}
	} else {
		// Hash the content of symlink, not the file it points to.
		s, err := os.Readlink(n.path)
		if err != nil {
			panic(err)
		}
		if _, err := io.WriteString(h, s); err != nil {
			panic(err)
		}
	}
	n.SHA256 = fmt.Sprintf("%x", h.Sum(nil))
	return true
}

func main() {
	flag.Parse()

	allOutput := make([]Node, 0, 1024) // Store all outputs.
	mutex := &sync.Mutex{}             // Guard allOutput

	ch := make(chan Node) // Pass nodes to goroutines.

	var wg sync.WaitGroup // To wait for all goroutines.
	wg.Add(*para)

	// Scan files in multiple goroutines.
	for i := 0; i < *para; i++ {
		go func() {
			defer wg.Done()

			output := make([]Node, 0, 1024) // Local output list.
			for node := range ch {
				if node.scan() {
					output = append(output, node)
				}
			}
			// Add to the global output list.
			mutex.Lock()
			allOutput = append(allOutput, output...)
			mutex.Unlock()
		}()
	}

	// Walk the directories and find files to scan.
	for _, dir := range flag.Args() {
		absDir, err := filepath.Abs(dir)
		if err != nil {
			panic(err)
		}
		deviceRoot := filepath.Clean(absDir + "/..")
		err = filepath.Walk(dir, func(path string, stat os.FileInfo, err error) error {
			if err != nil {
				panic(err)
			}
			if stat.IsDir() {
				return nil
			}
			absPath, err := filepath.Abs(path)
			if err != nil {
				panic(err)
			}
			devicePath, err := filepath.Rel(deviceRoot, absPath)
			if err != nil {
				panic(err)
			}
			devicePath = "/" + devicePath
			ch <- newNode(absPath, devicePath, stat)
			return nil
		})
		if err != nil {
			panic(err)
		}
	}

	// Wait until all the goroutines finish.
	close(ch)
	wg.Wait()

	// Sort the entries and dump as json.
	sort.Slice(allOutput, func(i, j int) bool {
		if allOutput[i].Size > allOutput[j].Size {
			return true
		}
		if allOutput[i].Size == allOutput[j].Size && strings.Compare(allOutput[i].Name, allOutput[j].Name) > 0 {
			return true
		}
		return false
	})

	j, err := json.MarshalIndent(allOutput, "", "  ")
	if err != nil {
		panic(nil)
	}

	fmt.Printf("%s\n", j)
}