/* * Copyright (C) 2018 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include <ctype.h> #include <set> #include <stack> #include <string> #include "src/perfetto_cmd/pbtxt_to_pb.h" #include "google/protobuf/io/zero_copy_stream_impl_lite.h" #include "perfetto/base/file_utils.h" #include "perfetto/base/logging.h" #include "perfetto/base/string_view.h" #include "perfetto/base/utils.h" #include "perfetto/common/descriptor.pb.h" #include "perfetto/protozero/message.h" #include "perfetto/protozero/message_handle.h" #include "perfetto/protozero/scattered_heap_buffer.h" #include "src/perfetto_cmd/perfetto_config.descriptor.h" namespace perfetto { constexpr char kConfigProtoName[] = ".perfetto.protos.TraceConfig"; using protos::DescriptorProto; using protos::EnumDescriptorProto; using protos::EnumValueDescriptorProto; using protos::FieldDescriptorProto; using protos::FileDescriptorSet; using ::google::protobuf::io::ZeroCopyInputStream; using ::google::protobuf::io::ArrayInputStream; namespace { constexpr bool IsIdentifierStart(char c) { return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || c == '_'; } constexpr bool IsIdentifierBody(char c) { return IsIdentifierStart(c) || isdigit(c); } const char* FieldToTypeName(const FieldDescriptorProto* field) { switch (field->type()) { case FieldDescriptorProto::TYPE_UINT64: return "uint64"; case FieldDescriptorProto::TYPE_UINT32: return "uint32"; case FieldDescriptorProto::TYPE_INT64: return "int64"; case FieldDescriptorProto::TYPE_SINT64: return "sint64"; case FieldDescriptorProto::TYPE_INT32: return "int32"; case FieldDescriptorProto::TYPE_SINT32: return "sint32"; case FieldDescriptorProto::TYPE_FIXED64: return "fixed64"; case FieldDescriptorProto::TYPE_SFIXED64: return "sfixed64"; case FieldDescriptorProto::TYPE_FIXED32: return "fixed32"; case FieldDescriptorProto::TYPE_SFIXED32: return "sfixed32"; case FieldDescriptorProto::TYPE_DOUBLE: return "double"; case FieldDescriptorProto::TYPE_FLOAT: return "float"; case FieldDescriptorProto::TYPE_BOOL: return "bool"; case FieldDescriptorProto::TYPE_STRING: return "string"; case FieldDescriptorProto::TYPE_BYTES: return "bytes"; case FieldDescriptorProto::TYPE_GROUP: return "group"; case FieldDescriptorProto::TYPE_MESSAGE: return "message"; case FieldDescriptorProto::TYPE_ENUM: return "enum"; } // For gcc PERFETTO_FATAL("Non complete switch"); } std::string Format(const char* fmt, std::map<std::string, std::string> args) { std::string result(fmt); for (const auto& key_value : args) { size_t start = result.find(key_value.first); PERFETTO_CHECK(start != std::string::npos); result.replace(start, key_value.first.size(), key_value.second); PERFETTO_CHECK(result.find(key_value.first) == std::string::npos); } return result; } enum ParseState { kWaitingForKey, kReadingKey, kWaitingForValue, kReadingStringValue, kReadingStringEscape, kReadingNumericValue, kReadingIdentifierValue, }; struct Token { size_t offset; size_t column; size_t row; base::StringView txt; size_t size() const { return txt.size(); } std::string ToStdString() const { return txt.ToStdString(); } }; struct ParserDelegateContext { const DescriptorProto* descriptor; protozero::Message* message; std::set<std::string> seen_fields; }; class ParserDelegate { public: ParserDelegate( const DescriptorProto* descriptor, protozero::Message* message, ErrorReporter* reporter, std::map<std::string, const DescriptorProto*> name_to_descriptor, std::map<std::string, const EnumDescriptorProto*> name_to_enum) : reporter_(reporter), name_to_descriptor_(std::move(name_to_descriptor)), name_to_enum_(std::move(name_to_enum)) { ctx_.push(ParserDelegateContext{descriptor, message, {}}); } void NumericField(Token key, Token value) { const FieldDescriptorProto* field = FindFieldByName( key, value, { FieldDescriptorProto::TYPE_UINT64, FieldDescriptorProto::TYPE_UINT32, FieldDescriptorProto::TYPE_INT64, FieldDescriptorProto::TYPE_SINT64, FieldDescriptorProto::TYPE_INT32, FieldDescriptorProto::TYPE_SINT32, FieldDescriptorProto::TYPE_FIXED64, FieldDescriptorProto::TYPE_SFIXED64, FieldDescriptorProto::TYPE_FIXED32, FieldDescriptorProto::TYPE_SFIXED32, FieldDescriptorProto::TYPE_DOUBLE, FieldDescriptorProto::TYPE_FLOAT, }); if (!field) return; const auto& field_type = field->type(); switch (field_type) { case FieldDescriptorProto::TYPE_UINT64: return VarIntField<uint64_t>(field, value); case FieldDescriptorProto::TYPE_UINT32: return VarIntField<uint32_t>(field, value); case FieldDescriptorProto::TYPE_INT64: case FieldDescriptorProto::TYPE_SINT64: return VarIntField<int64_t>(field, value); case FieldDescriptorProto::TYPE_INT32: case FieldDescriptorProto::TYPE_SINT32: return VarIntField<int32_t>(field, value); case FieldDescriptorProto::TYPE_FIXED64: case FieldDescriptorProto::TYPE_SFIXED64: return FixedField<int64_t>(field, value); case FieldDescriptorProto::TYPE_FIXED32: case FieldDescriptorProto::TYPE_SFIXED32: return FixedField<int32_t>(field, value); case FieldDescriptorProto::TYPE_DOUBLE: return FixedFloatField<double>(field, value); case FieldDescriptorProto::TYPE_FLOAT: return FixedFloatField<float>(field, value); case FieldDescriptorProto::TYPE_BOOL: case FieldDescriptorProto::TYPE_STRING: case FieldDescriptorProto::TYPE_BYTES: case FieldDescriptorProto::TYPE_GROUP: case FieldDescriptorProto::TYPE_MESSAGE: case FieldDescriptorProto::TYPE_ENUM: PERFETTO_FATAL("Invalid type"); } } void StringField(Token key, Token value) { const FieldDescriptorProto* field = FindFieldByName( key, value, { FieldDescriptorProto::TYPE_STRING, FieldDescriptorProto::TYPE_BYTES, }); if (!field) return; uint32_t field_id = static_cast<uint32_t>(field->number()); const auto& field_type = field->type(); PERFETTO_CHECK(field_type == FieldDescriptorProto::TYPE_STRING || field_type == FieldDescriptorProto::TYPE_BYTES); std::unique_ptr<char, base::FreeDeleter> s( static_cast<char*>(malloc(value.size()))); size_t j = 0; for (size_t i = 0; i < value.size(); i++) { char c = value.txt.data()[i]; if (c == '\\') { if (i + 1 >= value.size()) { // This should be caught by the lexer. PERFETTO_FATAL("Escape at end of string."); return; } char next = value.txt.data()[++i]; switch (next) { case '\\': case '\'': case '"': case '?': s.get()[j++] = next; break; case 'a': s.get()[j++] = '\a'; break; case 'b': s.get()[j++] = '\b'; break; case 'f': s.get()[j++] = '\f'; break; case 'n': s.get()[j++] = '\n'; break; case 'r': s.get()[j++] = '\r'; break; case 't': s.get()[j++] = '\t'; break; case 'v': s.get()[j++] = '\v'; break; default: AddError(value, "Unknown string escape in $k in " "proto $n: '$v'", std::map<std::string, std::string>{ {"$k", key.ToStdString()}, {"$n", descriptor_name()}, {"$v", value.ToStdString()}, }); return; } } else { s.get()[j++] = c; } } msg()->AppendBytes(field_id, s.get(), j); } void IdentifierField(Token key, Token value) { const FieldDescriptorProto* field = FindFieldByName( key, value, { FieldDescriptorProto::TYPE_BOOL, FieldDescriptorProto::TYPE_ENUM, }); if (!field) return; uint32_t field_id = static_cast<uint32_t>(field->number()); const auto& field_type = field->type(); if (field_type == FieldDescriptorProto::TYPE_BOOL) { if (value.txt != "true" && value.txt != "false") { AddError(value, "Expected 'true' or 'false' for boolean field $k in " "proto $n instead saw '$v'", std::map<std::string, std::string>{ {"$k", key.ToStdString()}, {"$n", descriptor_name()}, {"$v", value.ToStdString()}, }); return; } msg()->AppendTinyVarInt(field_id, value.txt == "true" ? 1 : 0); } else if (field_type == FieldDescriptorProto::TYPE_ENUM) { const std::string& type_name = field->type_name(); const EnumDescriptorProto* enum_descriptor = name_to_enum_[type_name]; PERFETTO_CHECK(enum_descriptor); bool found_value = false; int32_t enum_value_number = 0; for (const EnumValueDescriptorProto& enum_value : enum_descriptor->value()) { if (value.ToStdString() != enum_value.name()) continue; found_value = true; enum_value_number = enum_value.number(); break; } PERFETTO_CHECK(found_value); msg()->AppendVarInt<int32_t>(field_id, enum_value_number); } else { } } void BeginNestedMessage(Token key, Token value) { const FieldDescriptorProto* field = FindFieldByName(key, value, { FieldDescriptorProto::TYPE_MESSAGE, }); if (!field) return; uint32_t field_id = static_cast<uint32_t>(field->number()); const std::string& type_name = field->type_name(); const DescriptorProto* nested_descriptor = name_to_descriptor_[type_name]; PERFETTO_CHECK(nested_descriptor); auto* nested_msg = msg()->BeginNestedMessage<protozero::Message>(field_id); ctx_.push(ParserDelegateContext{nested_descriptor, nested_msg, {}}); } void EndNestedMessage() { msg()->Finalize(); ctx_.pop(); } void Eof() {} void AddError(size_t row, size_t column, const char* fmt, const std::map<std::string, std::string>& args) { reporter_->AddError(row, column, 0, Format(fmt, args)); } void AddError(Token token, const char* fmt, const std::map<std::string, std::string>& args) { reporter_->AddError(token.row, token.column, token.size(), Format(fmt, args)); } private: template <typename T> void VarIntField(const FieldDescriptorProto* field, Token t) { uint32_t field_id = static_cast<uint32_t>(field->number()); uint64_t n = 0; PERFETTO_CHECK(ParseInteger(t.txt, &n)); if (field->type() == FieldDescriptorProto::TYPE_SINT64 || field->type() == FieldDescriptorProto::TYPE_SINT32) { msg()->AppendSignedVarInt<T>(field_id, static_cast<T>(n)); } else { msg()->AppendVarInt<T>(field_id, static_cast<T>(n)); } } template <typename T> void FixedField(const FieldDescriptorProto* field, Token t) { uint32_t field_id = static_cast<uint32_t>(field->number()); uint64_t n = 0; PERFETTO_CHECK(ParseInteger(t.txt, &n)); msg()->AppendFixed<T>(field_id, static_cast<T>(n)); } template <typename T> void FixedFloatField(const FieldDescriptorProto* field, Token t) { uint32_t field_id = static_cast<uint32_t>(field->number()); double n = std::stod(t.ToStdString()); msg()->AppendFixed<T>(field_id, static_cast<T>(n)); } template <typename T> bool ParseInteger(base::StringView s, T* number_ptr) { uint64_t n = 0; PERFETTO_CHECK(sscanf(s.ToStdString().c_str(), "%" PRIu64, &n) == 1); PERFETTO_CHECK(n <= std::numeric_limits<T>::max()); *number_ptr = static_cast<T>(n); return true; } const FieldDescriptorProto* FindFieldByName( Token key, Token value, std::set<FieldDescriptorProto::Type> valid_field_types) { const std::string field_name = key.ToStdString(); const FieldDescriptorProto* field_descriptor = nullptr; for (const auto& f : descriptor()->field()) { if (f.name() == field_name) { field_descriptor = &f; break; } } if (!field_descriptor) { AddError(key, "No field named \"$n\" in proto $p", { {"$n", field_name}, {"$p", descriptor_name()}, }); return nullptr; } bool is_repeated = field_descriptor->label() == FieldDescriptorProto::LABEL_REPEATED; auto it_and_inserted = ctx_.top().seen_fields.emplace(field_name); if (!it_and_inserted.second && !is_repeated) { AddError(key, "Saw non-repeating field '$f' more than once", { {"$f", field_name}, }); } if (!valid_field_types.count(field_descriptor->type())) { AddError(value, "Expected value of type $t for field $k in proto $n " "instead saw '$v'", { {"$t", FieldToTypeName(field_descriptor)}, {"$k", field_name}, {"$n", descriptor_name()}, {"$v", value.ToStdString()}, }); return nullptr; } return field_descriptor; } const DescriptorProto* descriptor() { PERFETTO_CHECK(!ctx_.empty()); return ctx_.top().descriptor; } const std::string& descriptor_name() { return descriptor()->name(); } protozero::Message* msg() { PERFETTO_CHECK(!ctx_.empty()); return ctx_.top().message; } std::stack<ParserDelegateContext> ctx_; ErrorReporter* reporter_; std::map<std::string, const DescriptorProto*> name_to_descriptor_; std::map<std::string, const EnumDescriptorProto*> name_to_enum_; }; void Parse(const std::string& input, ParserDelegate* delegate) { ParseState state = kWaitingForKey; size_t column = 0; size_t row = 1; size_t depth = 0; bool saw_colon_for_this_key = false; bool saw_semicolon_for_this_value = true; bool comment_till_eol = false; Token key{}; Token value{}; for (size_t i = 0; i < input.size(); i++, column++) { bool last_character = i + 1 == input.size(); char c = input.at(i); if (c == '\n') { column = 0; row++; if (comment_till_eol) { comment_till_eol = false; continue; } } if (comment_till_eol) continue; switch (state) { case kWaitingForKey: if (isspace(c)) continue; if (c == '#') { comment_till_eol = true; continue; } if (c == '}') { if (depth == 0) { delegate->AddError(row, column, "Unmatched closing brace", {}); return; } saw_semicolon_for_this_value = false; depth--; delegate->EndNestedMessage(); continue; } if (!saw_semicolon_for_this_value && c == ';') { saw_semicolon_for_this_value = true; continue; } if (IsIdentifierStart(c)) { saw_colon_for_this_key = false; state = kReadingKey; key.offset = i; key.row = row; key.column = column; continue; } break; case kReadingKey: if (IsIdentifierBody(c)) continue; key.txt = base::StringView(input.data() + key.offset, i - key.offset); state = kWaitingForValue; if (c == '#') comment_till_eol = true; continue; case kWaitingForValue: if (isspace(c)) continue; if (c == '#') { comment_till_eol = true; continue; } value.offset = i; value.row = row; value.column = column; if (c == ':' && !saw_colon_for_this_key) { saw_colon_for_this_key = true; continue; } if (c == '"') { state = kReadingStringValue; continue; } if (c == '-' || isdigit(c)) { state = kReadingNumericValue; continue; } if (IsIdentifierStart(c)) { state = kReadingIdentifierValue; continue; } if (c == '{') { state = kWaitingForKey; depth++; value.txt = base::StringView(input.data() + value.offset, 1); delegate->BeginNestedMessage(key, value); continue; } break; case kReadingNumericValue: if (isspace(c) || c == ';' || last_character) { size_t size = i - value.offset + (last_character ? 1 : 0); value.txt = base::StringView(input.data() + value.offset, size); saw_semicolon_for_this_value = c == ';'; state = kWaitingForKey; delegate->NumericField(key, value); continue; } if (isdigit(c)) continue; break; case kReadingStringValue: if (c == '\\') { state = kReadingStringEscape; } else if (c == '"') { size_t size = i - value.offset - 1; value.column++; value.txt = base::StringView(input.data() + value.offset + 1, size); saw_semicolon_for_this_value = false; state = kWaitingForKey; delegate->StringField(key, value); } continue; case kReadingStringEscape: state = kReadingStringValue; continue; case kReadingIdentifierValue: if (isspace(c) || c == ';' || c == '#' || last_character) { size_t size = i - value.offset + (last_character ? 1 : 0); value.txt = base::StringView(input.data() + value.offset, size); comment_till_eol = c == '#'; saw_semicolon_for_this_value = c == ';'; state = kWaitingForKey; delegate->IdentifierField(key, value); continue; } if (IsIdentifierBody(c)) { continue; } break; } PERFETTO_FATAL("Unexpected char %c", c); } // for if (depth > 0) delegate->AddError(row, column, "Nested message not closed", {}); if (state != kWaitingForKey) delegate->AddError(row, column, "Unexpected end of input", {}); delegate->Eof(); } void AddNestedDescriptors( const std::string& prefix, const DescriptorProto* descriptor, std::map<std::string, const DescriptorProto*>* name_to_descriptor, std::map<std::string, const EnumDescriptorProto*>* name_to_enum) { for (const EnumDescriptorProto& enum_descriptor : descriptor->enum_type()) { const std::string name = prefix + "." + enum_descriptor.name(); (*name_to_enum)[name] = &enum_descriptor; } for (const DescriptorProto& nested_descriptor : descriptor->nested_type()) { const std::string name = prefix + "." + nested_descriptor.name(); (*name_to_descriptor)[name] = &nested_descriptor; AddNestedDescriptors(name, &nested_descriptor, name_to_descriptor, name_to_enum); } } } // namespace ErrorReporter::ErrorReporter() = default; ErrorReporter::~ErrorReporter() = default; std::vector<uint8_t> PbtxtToPb(const std::string& input, ErrorReporter* reporter) { std::map<std::string, const DescriptorProto*> name_to_descriptor; std::map<std::string, const EnumDescriptorProto*> name_to_enum; FileDescriptorSet file_descriptor_set; { file_descriptor_set.ParseFromArray( kPerfettoConfigDescriptor.data(), static_cast<int>(kPerfettoConfigDescriptor.size())); for (const auto& file_descriptor : file_descriptor_set.file()) { for (const auto& enum_descriptor : file_descriptor.enum_type()) { const std::string name = "." + file_descriptor.package() + "." + enum_descriptor.name(); name_to_enum[name] = &enum_descriptor; } for (const auto& descriptor : file_descriptor.message_type()) { const std::string name = "." + file_descriptor.package() + "." + descriptor.name(); name_to_descriptor[name] = &descriptor; AddNestedDescriptors(name, &descriptor, &name_to_descriptor, &name_to_enum); } } } const DescriptorProto* descriptor = name_to_descriptor[kConfigProtoName]; PERFETTO_CHECK(descriptor); protozero::HeapBuffered<protozero::Message> message; ParserDelegate delegate(descriptor, message.get(), reporter, std::move(name_to_descriptor), std::move(name_to_enum)); Parse(input, &delegate); return message.SerializeAsArray(); } } // namespace perfetto