// Copyright 2017 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "src/utf8_fix.h" #include <algorithm> #include <cassert> namespace protobuf_mutator { namespace { void StoreCode(char* e, char32_t code, uint8_t size, uint8_t prefix) { while (--size) { *(--e) = 0x80 | (code & 0x3F); code >>= 6; } *(--e) = prefix | code; } char* FixCode(char* b, const char* e, RandomEngine* random) { const char* start = b; assert(b < e); e = std::min<const char*>(e, b + 4); char32_t c = *b++; for (; b < e && (*b & 0xC0) == 0x80; ++b) { c = (c << 6) + (*b & 0x3F); } uint8_t size = b - start; switch (size) { case 1: c &= 0x7F; StoreCode(b, c, size, 0); break; case 2: c &= 0x7FF; if (c < 0x80) { c = std::uniform_int_distribution<char32_t>(0x80, 0x7FF)(*random); } StoreCode(b, c, size, 0xC0); break; case 3: c &= 0xFFFF; // [0xD800, 0xE000) are reserved for UTF-16 surrogate halves. if (c < 0x800 || (c >= 0xD800 && c < 0xE000)) { uint32_t halves = 0xE000 - 0xD800; c = std::uniform_int_distribution<char32_t>(0x800, 0xFFFF - halves)(*random); if (c >= 0xD800) c += halves; } StoreCode(b, c, size, 0xE0); break; case 4: c &= 0x1FFFFF; if (c < 0x10000 || c > 0x10FFFF) { c = std::uniform_int_distribution<char32_t>(0x10000, 0x10FFFF)(*random); } StoreCode(b, c, size, 0xF0); break; default: assert(false && "Unexpected size of UTF-8 sequence"); } return b; } } // namespace void FixUtf8String(std::string* str, RandomEngine* random) { if (str->empty()) return; char* b = &(*str)[0]; const char* e = b + str->size(); while (b < e) { b = FixCode(b, e, random); } } } // namespace protobuf_mutator