/* * Copyright (C) 2017 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package lexer import java.io.File interface ILexer { fun tokenize(str: String): List<Token> fun tokenize(file: File): List<Token> { return this.tokenize(file.readText()) } companion object { fun padDelimiters(str: String): String { val sb = StringBuilder() val delimiters = TokenGrammar.values() .filter { it.category == TokenCategory.Delimiter } .filter { it != TokenGrammar.COMMENT_START } //don't convert '/**' to '/* *' .map { it.value } //return string representation str.lineSequence().forEach { line -> var newLine = line for (token in delimiters) { newLine = newLine.replace(token, " $token ") } //delimiter corrections newLine = unpadDecimal(newLine) //'nn . nn' => 'n.n' newLine = newLine.replace(":\\s+:".toRegex(), TokenGrammar.PKG_SCOPE.value) //': :' => '::' //squeeze multi-char ops with chevrons newLine = newLine.replace("<\\s+<".toRegex(), TokenGrammar.LSHIFT.value) newLine = newLine.replace(">\\s+>".toRegex(), TokenGrammar.RSHIFT.value) newLine = newLine.replace("<\\s+=".toRegex(), TokenGrammar.LEQ.value) newLine = newLine.replace(">\\s+=".toRegex(), TokenGrammar.GEQ.value) sb.appendln(newLine) } return sb.toString() } /** * Replace 'nn . nn' with 'n.n' * Doesn't take into account decimals with missing a prefix or suffix, e.g. '9.' or '.9' */ private fun unpadDecimal(str: String): String { var newStr = str Regex("(\\d+)\\s*\\.\\s*(\\d+)").findAll(newStr).forEach { matchResult -> val n1 = matchResult.groups[1]?.value val n2 = matchResult.groups[2]?.value if (n1 != null && n2 != null) { newStr = newStr.replace("${n1}\\s*\\.\\s*${n2}".toRegex(), "${n1}.${n2}") } } return newStr } /** * Clean up the padded and tokenized doc block (reverse padDelimiters) */ fun unpadDelimiters(str: String): String { var newStr = str val delimiters = TokenGrammar.values() .filter { it.category == TokenCategory.Delimiter } .map { it.value } //return string representation for (token in delimiters) { newStr = newStr.replace(" $token ", token) } //special case newStr = newStr.replace(Regex("\\s+\\.\\s*$"), ".") //end-of-line sentence periods newStr = newStr.replace(",", ", ") //give comma some breathing room return newStr } } }