/*
* Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of Oracle nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* This source code is provided to illustrate the usage of a given feature
* or technique and has been deliberately simplified. Additional steps
* required for a production-quality application, such as security checks,
* input validation, and proper error handling, might not be present in
* this sample code.
*/
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.function.Consumer;
import java.util.regex.Pattern;
/**
* WC - Prints newline, word, and character counts for each file. See
* the {@link #usage} method for instructions and command line parameters. This
* sample shows usages of:
* <ul>
* <li>Lambda and bulk operations. Shows how to create a custom collector to
* gather custom statistics. Implements the collection of statistics using a
* built-in API.</li>
* <li>Constructor reference.</li>
* <li>Try-with-resources feature.</li>
* </ul>
*
*/
public class WC {
//The number of characters that may be read.
private static final int READ_AHEAD_LIMIT = 100_000_000;
//The pattern for splitting strings by non word characters to get words.
private static final Pattern nonWordPattern = Pattern.compile("\\W");
/**
* The main method for the WC program. Run the program with an empty
* argument list to see possible arguments.
*
* @param args the argument list for WC
* @throws java.io.IOException If an input exception occurred.
*/
public static void main(String[] args) throws IOException {
if (args.length != 1) {
usage();
return;
}
try (BufferedReader reader = new BufferedReader(
new FileReader(args[0]))) {
reader.mark(READ_AHEAD_LIMIT);
/*
* Statistics can be gathered in four passes using a built-in API.
* The method demonstrates how separate operations can be
* implemented using a built-in API.
*/
collectInFourPasses(reader);
/*
* Usage of several passes to collect data is not the best way.
* Statistics can be gathered by a custom collector in one pass.
*/
reader.reset();
collectInOnePass(reader);
} catch (FileNotFoundException e) {
usage();
System.err.println(e);
}
}
private static void collectInFourPasses(BufferedReader reader)
throws IOException {
/*
* Input is read as a stream of lines by lines().
* Every line is turned into a stream of chars by the flatMapToInt(...)
* method.
* Length of the stream is counted by count().
*/
System.out.println("Character count = "
+ reader.lines().flatMapToInt(String::chars).count());
/*
* Input is read as a stream of lines by lines().
* Every line is split by nonWordPattern into words by flatMap(...)
* method.
* Empty lines are removed by the filter(...) method.
* Length of the stream is counted by count().
*/
reader.reset();
System.out.println("Word count = "
+ reader.lines()
.flatMap(nonWordPattern::splitAsStream)
.filter(str -> !str.isEmpty()).count());
reader.reset();
System.out.println("Newline count = " + reader.lines().count());
/*
* Input is read as a stream of lines by lines().
* Every line is mapped to its length.
* Maximum of the lengths is calculated.
*/
reader.reset();
System.out.println("Max line length = "
+ reader.lines().mapToInt(String::length).max().getAsInt());
}
private static void collectInOnePass(BufferedReader reader) {
/*
* The collect() method has three parameters:
* The first parameter is the {@code WCStatistic} constructor reference.
* collect() will create {@code WCStatistics} instances, where
* statistics will be aggregated.
* The second parameter shows how {@code WCStatistics} will process
* String.
* The third parameter shows how to merge two {@code WCStatistic}
* instances.
*
* Also {@code Collector} can be used, which would be more reusable
* solution. See {@code CSVProcessor} example for how {@code Collector}
* can be implemented.
*
* Note that the any performance increase when going parallel will
* depend on the size of the input (lines) and the cost per-element.
*/
WCStatistics wc = reader.lines().parallel()
.collect(WCStatistics::new,
WCStatistics::accept,
WCStatistics::combine);
System.out.println(wc);
}
private static void usage() {
System.out.println("Usage: " + WC.class.getSimpleName() + " FILE");
System.out.println("Print newline, word,"
+ " character counts and max line length for FILE.");
}
private static class WCStatistics implements Consumer<String> {
/*
* @implNote This implementation does not need to be thread safe because
* the parallel implementation of
* {@link java.util.stream.Stream#collect Stream.collect()}
* provides the necessary partitioning and isolation for safe parallel
* execution.
*/
private long characterCount;
private long lineCount;
private long wordCount;
private long maxLineLength;
/*
* Processes line.
*/
@Override
public void accept(String line) {
characterCount += line.length();
lineCount++;
wordCount += nonWordPattern.splitAsStream(line)
.filter(str -> !str.isEmpty()).count();
maxLineLength = Math.max(maxLineLength, line.length());
}
/*
* Merges two WCStatistics.
*/
public void combine(WCStatistics stat) {
wordCount += stat.wordCount;
lineCount += stat.lineCount;
characterCount += stat.characterCount;
maxLineLength = Math.max(maxLineLength, stat.maxLineLength);
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("#------WCStatistic------#\n");
sb.append("Character count = ").append(characterCount).append('\n');
sb.append("Word count = ").append(wordCount).append('\n');
sb.append("Newline count = ").append(lineCount).append('\n');
sb.append("Max line length = ").append(maxLineLength).append('\n');
return sb.toString();
}
}
}