普通文本  |  91行  |  2.37 KB

// Copyright 2017 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "src/utf8_fix.h"

#include <algorithm>
#include <cassert>

namespace protobuf_mutator {

namespace {

void StoreCode(char* e, char32_t code, uint8_t size, uint8_t prefix) {
  while (--size) {
    *(--e) = 0x80 | (code & 0x3F);
    code >>= 6;
  }
  *(--e) = prefix | code;
}

char* FixCode(char* b, const char* e, RandomEngine* random) {
  const char* start = b;
  assert(b < e);

  e = std::min<const char*>(e, b + 4);
  char32_t c = *b++;
  for (; b < e && (*b & 0xC0) == 0x80; ++b) {
    c = (c << 6) + (*b & 0x3F);
  }
  uint8_t size = b - start;
  switch (size) {
    case 1:
      c &= 0x7F;
      StoreCode(b, c, size, 0);
      break;
    case 2:
      c &= 0x7FF;
      if (c < 0x80) {
        c = std::uniform_int_distribution<char32_t>(0x80, 0x7FF)(*random);
      }
      StoreCode(b, c, size, 0xC0);
      break;
    case 3:
      c &= 0xFFFF;

      // [0xD800, 0xE000) are reserved for UTF-16 surrogate halves.
      if (c < 0x800 || (c >= 0xD800 && c < 0xE000)) {
        uint32_t halves = 0xE000 - 0xD800;
        c = std::uniform_int_distribution<char32_t>(0x800,
                                                    0xFFFF - halves)(*random);
        if (c >= 0xD800) c += halves;
      }
      StoreCode(b, c, size, 0xE0);
      break;
    case 4:
      c &= 0x1FFFFF;
      if (c < 0x10000 || c > 0x10FFFF) {
        c = std::uniform_int_distribution<char32_t>(0x10000, 0x10FFFF)(*random);
      }
      StoreCode(b, c, size, 0xF0);
      break;
    default:
      assert(false && "Unexpected size of UTF-8 sequence");
  }
  return b;
}

}  // namespace

void FixUtf8String(std::string* str, RandomEngine* random) {
  if (str->empty()) return;
  char* b = &(*str)[0];
  const char* e = b + str->size();
  while (b < e) {
    b = FixCode(b, e, random);
  }
}

}  // namespace protobuf_mutator