/* * Copyright (C) 2014 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "RouteController.h" #include "Fwmark.h" #include "UidRanges.h" #define LOG_TAG "Netd" #include "log/log.h" #include "logwrap/logwrap.h" #include "resolv_netid.h" #include <arpa/inet.h> #include <fcntl.h> #include <linux/fib_rules.h> #include <map> #include <net/if.h> #include <sys/stat.h> namespace { // BEGIN CONSTANTS -------------------------------------------------------------------------------- const uint32_t RULE_PRIORITY_VPN_OVERRIDE_SYSTEM = 10000; const uint32_t RULE_PRIORITY_VPN_OUTPUT_TO_LOCAL = 11000; const uint32_t RULE_PRIORITY_SECURE_VPN = 12000; const uint32_t RULE_PRIORITY_EXPLICIT_NETWORK = 13000; const uint32_t RULE_PRIORITY_OUTPUT_INTERFACE = 14000; const uint32_t RULE_PRIORITY_LEGACY_SYSTEM = 15000; const uint32_t RULE_PRIORITY_LEGACY_NETWORK = 16000; const uint32_t RULE_PRIORITY_LOCAL_NETWORK = 17000; const uint32_t RULE_PRIORITY_TETHERING = 18000; const uint32_t RULE_PRIORITY_IMPLICIT_NETWORK = 19000; const uint32_t RULE_PRIORITY_BYPASSABLE_VPN = 20000; const uint32_t RULE_PRIORITY_VPN_FALLTHROUGH = 21000; const uint32_t RULE_PRIORITY_DEFAULT_NETWORK = 22000; const uint32_t RULE_PRIORITY_DIRECTLY_CONNECTED = 23000; const uint32_t RULE_PRIORITY_UNREACHABLE = 32000; const uint32_t ROUTE_TABLE_LOCAL_NETWORK = 97; const uint32_t ROUTE_TABLE_LEGACY_NETWORK = 98; const uint32_t ROUTE_TABLE_LEGACY_SYSTEM = 99; const char* const ROUTE_TABLE_NAME_LOCAL_NETWORK = "local_network"; const char* const ROUTE_TABLE_NAME_LEGACY_NETWORK = "legacy_network"; const char* const ROUTE_TABLE_NAME_LEGACY_SYSTEM = "legacy_system"; const char* const ROUTE_TABLE_NAME_LOCAL = "local"; const char* const ROUTE_TABLE_NAME_MAIN = "main"; // TODO: These values aren't defined by the Linux kernel, because our UID routing changes are not // upstream (yet?), so we can't just pick them up from kernel headers. When (if?) the changes make // it upstream, we'll remove this and rely on the kernel header values. For now, add a static assert // that will warn us if upstream has given these values some other meaning. const uint16_t FRA_UID_START = 18; const uint16_t FRA_UID_END = 19; static_assert(FRA_UID_START > FRA_MAX, "Android-specific FRA_UID_{START,END} values also assigned in Linux uapi. " "Check that these values match what the kernel does and then update this assertion."); const uint16_t NETLINK_REQUEST_FLAGS = NLM_F_REQUEST | NLM_F_ACK; const uint16_t NETLINK_CREATE_REQUEST_FLAGS = NETLINK_REQUEST_FLAGS | NLM_F_CREATE | NLM_F_EXCL; const sockaddr_nl NETLINK_ADDRESS = {AF_NETLINK, 0, 0, 0}; const uint8_t AF_FAMILIES[] = {AF_INET, AF_INET6}; const char* const IP_VERSIONS[] = {"-4", "-6"}; const uid_t UID_ROOT = 0; const char* const IIF_NONE = NULL; const char* const OIF_NONE = NULL; const bool ACTION_ADD = true; const bool ACTION_DEL = false; const bool MODIFY_NON_UID_BASED_RULES = true; const char* const RT_TABLES_PATH = "/data/misc/net/rt_tables"; const int RT_TABLES_FLAGS = O_CREAT | O_TRUNC | O_WRONLY | O_NOFOLLOW | O_CLOEXEC; const mode_t RT_TABLES_MODE = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH; // mode 0644, rw-r--r-- const unsigned ROUTE_FLUSH_ATTEMPTS = 2; // Avoids "non-constant-expression cannot be narrowed from type 'unsigned int' to 'unsigned short'" // warnings when using RTA_LENGTH(x) inside static initializers (even when x is already uint16_t). constexpr uint16_t U16_RTA_LENGTH(uint16_t x) { return RTA_LENGTH(x); } // These are practically const, but can't be declared so, because they are used to initialize // non-const pointers ("void* iov_base") in iovec arrays. rtattr FRATTR_PRIORITY = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_PRIORITY }; rtattr FRATTR_TABLE = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_TABLE }; rtattr FRATTR_FWMARK = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_FWMARK }; rtattr FRATTR_FWMASK = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_FWMASK }; rtattr FRATTR_UID_START = { U16_RTA_LENGTH(sizeof(uid_t)), FRA_UID_START }; rtattr FRATTR_UID_END = { U16_RTA_LENGTH(sizeof(uid_t)), FRA_UID_END }; rtattr RTATTR_TABLE = { U16_RTA_LENGTH(sizeof(uint32_t)), RTA_TABLE }; rtattr RTATTR_OIF = { U16_RTA_LENGTH(sizeof(uint32_t)), RTA_OIF }; uint8_t PADDING_BUFFER[RTA_ALIGNTO] = {0, 0, 0, 0}; // END CONSTANTS ---------------------------------------------------------------------------------- // No locks needed because RouteController is accessed only from one thread (in CommandListener). std::map<std::string, uint32_t> interfaceToTable; uint32_t getRouteTableForInterface(const char* interface) { uint32_t index = if_nametoindex(interface); if (index) { index += RouteController::ROUTE_TABLE_OFFSET_FROM_INDEX; interfaceToTable[interface] = index; return index; } // If the interface goes away if_nametoindex() will return 0 but we still need to know // the index so we can remove the rules and routes. auto iter = interfaceToTable.find(interface); if (iter == interfaceToTable.end()) { ALOGE("cannot find interface %s", interface); return RT_TABLE_UNSPEC; } return iter->second; } void addTableName(uint32_t table, const std::string& name, std::string* contents) { char tableString[UINT32_STRLEN]; snprintf(tableString, sizeof(tableString), "%u", table); *contents += tableString; *contents += " "; *contents += name; *contents += "\n"; } // Doesn't return success/failure as the file is optional; it's okay if we fail to update it. void updateTableNamesFile() { std::string contents; addTableName(RT_TABLE_LOCAL, ROUTE_TABLE_NAME_LOCAL, &contents); addTableName(RT_TABLE_MAIN, ROUTE_TABLE_NAME_MAIN, &contents); addTableName(ROUTE_TABLE_LOCAL_NETWORK, ROUTE_TABLE_NAME_LOCAL_NETWORK, &contents); addTableName(ROUTE_TABLE_LEGACY_NETWORK, ROUTE_TABLE_NAME_LEGACY_NETWORK, &contents); addTableName(ROUTE_TABLE_LEGACY_SYSTEM, ROUTE_TABLE_NAME_LEGACY_SYSTEM, &contents); for (const auto& entry : interfaceToTable) { addTableName(entry.second, entry.first, &contents); } int fd = open(RT_TABLES_PATH, RT_TABLES_FLAGS, RT_TABLES_MODE); if (fd == -1) { ALOGE("failed to create %s (%s)", RT_TABLES_PATH, strerror(errno)); return; } // File creation is affected by umask, so make sure the right mode bits are set. if (fchmod(fd, RT_TABLES_MODE) == -1) { ALOGE("failed to set mode 0%o on %s (%s)", RT_TABLES_MODE, RT_TABLES_PATH, strerror(errno)); } ssize_t bytesWritten = write(fd, contents.data(), contents.size()); if (bytesWritten != static_cast<ssize_t>(contents.size())) { ALOGE("failed to write to %s (%zd vs %zu bytes) (%s)", RT_TABLES_PATH, bytesWritten, contents.size(), strerror(errno)); } close(fd); } // Sends a netlink request and expects an ack. // |iov| is an array of struct iovec that contains the netlink message payload. // The netlink header is generated by this function based on |action| and |flags|. // Returns -errno if there was an error or if the kernel reported an error. WARN_UNUSED_RESULT int sendNetlinkRequest(uint16_t action, uint16_t flags, iovec* iov, int iovlen) { nlmsghdr nlmsg = { .nlmsg_type = action, .nlmsg_flags = flags, }; iov[0].iov_base = &nlmsg; iov[0].iov_len = sizeof(nlmsg); for (int i = 0; i < iovlen; ++i) { nlmsg.nlmsg_len += iov[i].iov_len; } int ret; struct { nlmsghdr msg; nlmsgerr err; } response; int sock = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE); if (sock != -1 && connect(sock, reinterpret_cast<const sockaddr*>(&NETLINK_ADDRESS), sizeof(NETLINK_ADDRESS)) != -1 && writev(sock, iov, iovlen) != -1 && (ret = recv(sock, &response, sizeof(response), 0)) != -1) { if (ret == sizeof(response)) { ret = response.err.error; // Netlink errors are negative errno. if (ret) { ALOGE("netlink response contains error (%s)", strerror(-ret)); } } else { ALOGE("bad netlink response message size (%d != %zu)", ret, sizeof(response)); ret = -EBADMSG; } } else { ALOGE("netlink socket/connect/writev/recv failed (%s)", strerror(errno)); ret = -errno; } if (sock != -1) { close(sock); } return ret; } // Returns 0 on success or negative errno on failure. int padInterfaceName(const char* input, char* name, size_t* length, uint16_t* padding) { if (!input) { *length = 0; *padding = 0; return 0; } *length = strlcpy(name, input, IFNAMSIZ) + 1; if (*length > IFNAMSIZ) { ALOGE("interface name too long (%zu > %u)", *length, IFNAMSIZ); return -ENAMETOOLONG; } *padding = RTA_SPACE(*length) - RTA_LENGTH(*length); return 0; } // Adds or removes a routing rule for IPv4 and IPv6. // // + If |table| is non-zero, the rule points at the specified routing table. Otherwise, the rule // returns ENETUNREACH. // + If |mask| is non-zero, the rule matches the specified fwmark and mask. Otherwise, |fwmark| is // ignored. // + If |iif| is non-NULL, the rule matches the specified incoming interface. // + If |oif| is non-NULL, the rule matches the specified outgoing interface. // + If |uidStart| and |uidEnd| are not INVALID_UID, the rule matches packets from UIDs in that // range (inclusive). Otherwise, the rule matches packets from all UIDs. // // Returns 0 on success or negative errno on failure. WARN_UNUSED_RESULT int modifyIpRule(uint16_t action, uint32_t priority, uint32_t table, uint32_t fwmark, uint32_t mask, const char* iif, const char* oif, uid_t uidStart, uid_t uidEnd) { // Ensure that if you set a bit in the fwmark, it's not being ignored by the mask. if (fwmark & ~mask) { ALOGE("mask 0x%x does not select all the bits set in fwmark 0x%x", mask, fwmark); return -ERANGE; } // Interface names must include exactly one terminating NULL and be properly padded, or older // kernels will refuse to delete rules. char iifName[IFNAMSIZ], oifName[IFNAMSIZ]; size_t iifLength, oifLength; uint16_t iifPadding, oifPadding; if (int ret = padInterfaceName(iif, iifName, &iifLength, &iifPadding)) { return ret; } if (int ret = padInterfaceName(oif, oifName, &oifLength, &oifPadding)) { return ret; } // Either both start and end UID must be specified, or neither. if ((uidStart == INVALID_UID) != (uidEnd == INVALID_UID)) { ALOGE("incompatible start and end UIDs (%u vs %u)", uidStart, uidEnd); return -EUSERS; } bool isUidRule = (uidStart != INVALID_UID); // Assemble a rule request and put it in an array of iovec structures. fib_rule_hdr rule = { .action = static_cast<uint8_t>(table != RT_TABLE_UNSPEC ? FR_ACT_TO_TBL : FR_ACT_UNREACHABLE), }; rtattr fraIifName = { U16_RTA_LENGTH(iifLength), FRA_IIFNAME }; rtattr fraOifName = { U16_RTA_LENGTH(oifLength), FRA_OIFNAME }; iovec iov[] = { { NULL, 0 }, { &rule, sizeof(rule) }, { &FRATTR_PRIORITY, sizeof(FRATTR_PRIORITY) }, { &priority, sizeof(priority) }, { &FRATTR_TABLE, table != RT_TABLE_UNSPEC ? sizeof(FRATTR_TABLE) : 0 }, { &table, table != RT_TABLE_UNSPEC ? sizeof(table) : 0 }, { &FRATTR_FWMARK, mask ? sizeof(FRATTR_FWMARK) : 0 }, { &fwmark, mask ? sizeof(fwmark) : 0 }, { &FRATTR_FWMASK, mask ? sizeof(FRATTR_FWMASK) : 0 }, { &mask, mask ? sizeof(mask) : 0 }, { &FRATTR_UID_START, isUidRule ? sizeof(FRATTR_UID_START) : 0 }, { &uidStart, isUidRule ? sizeof(uidStart) : 0 }, { &FRATTR_UID_END, isUidRule ? sizeof(FRATTR_UID_END) : 0 }, { &uidEnd, isUidRule ? sizeof(uidEnd) : 0 }, { &fraIifName, iif != IIF_NONE ? sizeof(fraIifName) : 0 }, { iifName, iifLength }, { PADDING_BUFFER, iifPadding }, { &fraOifName, oif != OIF_NONE ? sizeof(fraOifName) : 0 }, { oifName, oifLength }, { PADDING_BUFFER, oifPadding }, }; uint16_t flags = (action == RTM_NEWRULE) ? NETLINK_CREATE_REQUEST_FLAGS : NETLINK_REQUEST_FLAGS; for (size_t i = 0; i < ARRAY_SIZE(AF_FAMILIES); ++i) { rule.family = AF_FAMILIES[i]; if (int ret = sendNetlinkRequest(action, flags, iov, ARRAY_SIZE(iov))) { return ret; } } return 0; } WARN_UNUSED_RESULT int modifyIpRule(uint16_t action, uint32_t priority, uint32_t table, uint32_t fwmark, uint32_t mask) { return modifyIpRule(action, priority, table, fwmark, mask, IIF_NONE, OIF_NONE, INVALID_UID, INVALID_UID); } // Adds or deletes an IPv4 or IPv6 route. // Returns 0 on success or negative errno on failure. WARN_UNUSED_RESULT int modifyIpRoute(uint16_t action, uint32_t table, const char* interface, const char* destination, const char* nexthop) { // At least the destination must be non-null. if (!destination) { ALOGE("null destination"); return -EFAULT; } // Parse the prefix. uint8_t rawAddress[sizeof(in6_addr)]; uint8_t family; uint8_t prefixLength; int rawLength = parsePrefix(destination, &family, rawAddress, sizeof(rawAddress), &prefixLength); if (rawLength < 0) { ALOGE("parsePrefix failed for destination %s (%s)", destination, strerror(-rawLength)); return rawLength; } if (static_cast<size_t>(rawLength) > sizeof(rawAddress)) { ALOGE("impossible! address too long (%d vs %zu)", rawLength, sizeof(rawAddress)); return -ENOBUFS; // Cannot happen; parsePrefix only supports IPv4 and IPv6. } uint8_t type = RTN_UNICAST; uint32_t ifindex; uint8_t rawNexthop[sizeof(in6_addr)]; if (nexthop && !strcmp(nexthop, "unreachable")) { type = RTN_UNREACHABLE; // 'interface' is likely non-NULL, as the caller (modifyRoute()) likely used it to lookup // the table number. But it's an error to specify an interface ("dev ...") or a nexthop for // unreachable routes, so nuke them. (IPv6 allows them to be specified; IPv4 doesn't.) interface = OIF_NONE; nexthop = NULL; } else if (nexthop && !strcmp(nexthop, "throw")) { type = RTN_THROW; interface = OIF_NONE; nexthop = NULL; } else { // If an interface was specified, find the ifindex. if (interface != OIF_NONE) { ifindex = if_nametoindex(interface); if (!ifindex) { ALOGE("cannot find interface %s", interface); return -ENODEV; } } // If a nexthop was specified, parse it as the same family as the prefix. if (nexthop && inet_pton(family, nexthop, rawNexthop) <= 0) { ALOGE("inet_pton failed for nexthop %s", nexthop); return -EINVAL; } } // Assemble a rtmsg and put it in an array of iovec structures. rtmsg route = { .rtm_protocol = RTPROT_STATIC, .rtm_type = type, .rtm_family = family, .rtm_dst_len = prefixLength, .rtm_scope = static_cast<uint8_t>(nexthop ? RT_SCOPE_UNIVERSE : RT_SCOPE_LINK), }; rtattr rtaDst = { U16_RTA_LENGTH(rawLength), RTA_DST }; rtattr rtaGateway = { U16_RTA_LENGTH(rawLength), RTA_GATEWAY }; iovec iov[] = { { NULL, 0 }, { &route, sizeof(route) }, { &RTATTR_TABLE, sizeof(RTATTR_TABLE) }, { &table, sizeof(table) }, { &rtaDst, sizeof(rtaDst) }, { rawAddress, static_cast<size_t>(rawLength) }, { &RTATTR_OIF, interface != OIF_NONE ? sizeof(RTATTR_OIF) : 0 }, { &ifindex, interface != OIF_NONE ? sizeof(ifindex) : 0 }, { &rtaGateway, nexthop ? sizeof(rtaGateway) : 0 }, { rawNexthop, nexthop ? static_cast<size_t>(rawLength) : 0 }, }; uint16_t flags = (action == RTM_NEWROUTE) ? NETLINK_CREATE_REQUEST_FLAGS : NETLINK_REQUEST_FLAGS; return sendNetlinkRequest(action, flags, iov, ARRAY_SIZE(iov)); } // An iptables rule to mark incoming packets on a network with the netId of the network. // // This is so that the kernel can: // + Use the right fwmark for (and thus correctly route) replies (e.g.: TCP RST, ICMP errors, ping // replies, SYN-ACKs, etc). // + Mark sockets that accept connections from this interface so that the connection stays on the // same interface. WARN_UNUSED_RESULT int modifyIncomingPacketMark(unsigned netId, const char* interface, Permission permission, bool add) { Fwmark fwmark; fwmark.netId = netId; fwmark.explicitlySelected = true; fwmark.protectedFromVpn = true; fwmark.permission = permission; char markString[UINT32_HEX_STRLEN]; snprintf(markString, sizeof(markString), "0x%x", fwmark.intValue); if (execIptables(V4V6, "-t", "mangle", add ? "-A" : "-D", "INPUT", "-i", interface, "-j", "MARK", "--set-mark", markString, NULL)) { ALOGE("failed to change iptables rule that sets incoming packet mark"); return -EREMOTEIO; } return 0; } // A rule to route responses to the local network forwarded via the VPN. // // When a VPN is in effect, packets from the local network to upstream networks are forwarded into // the VPN's tunnel interface. When the VPN forwards the responses, they emerge out of the tunnel. WARN_UNUSED_RESULT int modifyVpnOutputToLocalRule(const char* vpnInterface, bool add) { return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_VPN_OUTPUT_TO_LOCAL, ROUTE_TABLE_LOCAL_NETWORK, MARK_UNSET, MARK_UNSET, vpnInterface, OIF_NONE, INVALID_UID, INVALID_UID); } // A rule to route all traffic from a given set of UIDs to go over the VPN. // // Notice that this rule doesn't use the netId. I.e., no matter what netId the user's socket may // have, if they are subject to this VPN, their traffic has to go through it. Allows the traffic to // bypass the VPN if the protectedFromVpn bit is set. WARN_UNUSED_RESULT int modifyVpnUidRangeRule(uint32_t table, uid_t uidStart, uid_t uidEnd, bool secure, bool add) { Fwmark fwmark; Fwmark mask; fwmark.protectedFromVpn = false; mask.protectedFromVpn = true; uint32_t priority; if (secure) { priority = RULE_PRIORITY_SECURE_VPN; } else { priority = RULE_PRIORITY_BYPASSABLE_VPN; fwmark.explicitlySelected = false; mask.explicitlySelected = true; } return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, priority, table, fwmark.intValue, mask.intValue, IIF_NONE, OIF_NONE, uidStart, uidEnd); } // A rule to allow system apps to send traffic over this VPN even if they are not part of the target // set of UIDs. // // This is needed for DnsProxyListener to correctly resolve a request for a user who is in the // target set, but where the DnsProxyListener itself is not. WARN_UNUSED_RESULT int modifyVpnSystemPermissionRule(unsigned netId, uint32_t table, bool secure, bool add) { Fwmark fwmark; Fwmark mask; fwmark.netId = netId; mask.netId = FWMARK_NET_ID_MASK; fwmark.permission = PERMISSION_SYSTEM; mask.permission = PERMISSION_SYSTEM; uint32_t priority = secure ? RULE_PRIORITY_SECURE_VPN : RULE_PRIORITY_BYPASSABLE_VPN; return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, priority, table, fwmark.intValue, mask.intValue); } // A rule to route traffic based on an explicitly chosen network. // // Supports apps that use the multinetwork APIs to restrict their traffic to a network. // // Even though we check permissions at the time we set a netId into the fwmark of a socket, we need // to check it again in the rules here, because a network's permissions may have been updated via // modifyNetworkPermission(). WARN_UNUSED_RESULT int modifyExplicitNetworkRule(unsigned netId, uint32_t table, Permission permission, uid_t uidStart, uid_t uidEnd, bool add) { Fwmark fwmark; Fwmark mask; fwmark.netId = netId; mask.netId = FWMARK_NET_ID_MASK; fwmark.explicitlySelected = true; mask.explicitlySelected = true; fwmark.permission = permission; mask.permission = permission; return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_EXPLICIT_NETWORK, table, fwmark.intValue, mask.intValue, IIF_NONE, OIF_NONE, uidStart, uidEnd); } // A rule to route traffic based on a chosen outgoing interface. // // Supports apps that use SO_BINDTODEVICE or IP_PKTINFO options and the kernel that already knows // the outgoing interface (typically for link-local communications). WARN_UNUSED_RESULT int modifyOutputInterfaceRule(const char* interface, uint32_t table, Permission permission, uid_t uidStart, uid_t uidEnd, bool add) { Fwmark fwmark; Fwmark mask; fwmark.permission = permission; mask.permission = permission; return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_OUTPUT_INTERFACE, table, fwmark.intValue, mask.intValue, IIF_NONE, interface, uidStart, uidEnd); } // A rule to route traffic based on the chosen network. // // This is for sockets that have not explicitly requested a particular network, but have been // bound to one when they called connect(). This ensures that sockets connected on a particular // network stay on that network even if the default network changes. WARN_UNUSED_RESULT int modifyImplicitNetworkRule(unsigned netId, uint32_t table, Permission permission, bool add) { Fwmark fwmark; Fwmark mask; fwmark.netId = netId; mask.netId = FWMARK_NET_ID_MASK; fwmark.explicitlySelected = false; mask.explicitlySelected = true; fwmark.permission = permission; mask.permission = permission; return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_IMPLICIT_NETWORK, table, fwmark.intValue, mask.intValue); } // A rule to enable split tunnel VPNs. // // If a packet with a VPN's netId doesn't find a route in the VPN's routing table, it's allowed to // go over the default network, provided it wasn't explicitly restricted to the VPN and has the // permissions required by the default network. WARN_UNUSED_RESULT int modifyVpnFallthroughRule(uint16_t action, unsigned vpnNetId, const char* physicalInterface, Permission permission) { uint32_t table = getRouteTableForInterface(physicalInterface); if (table == RT_TABLE_UNSPEC) { return -ESRCH; } Fwmark fwmark; Fwmark mask; fwmark.netId = vpnNetId; mask.netId = FWMARK_NET_ID_MASK; fwmark.explicitlySelected = false; mask.explicitlySelected = true; fwmark.permission = permission; mask.permission = permission; return modifyIpRule(action, RULE_PRIORITY_VPN_FALLTHROUGH, table, fwmark.intValue, mask.intValue); } // Add rules to allow legacy routes added through the requestRouteToHost() API. WARN_UNUSED_RESULT int addLegacyRouteRules() { Fwmark fwmark; Fwmark mask; fwmark.explicitlySelected = false; mask.explicitlySelected = true; // Rules to allow legacy routes to override the default network. if (int ret = modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_LEGACY_SYSTEM, ROUTE_TABLE_LEGACY_SYSTEM, fwmark.intValue, mask.intValue)) { return ret; } if (int ret = modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_LEGACY_NETWORK, ROUTE_TABLE_LEGACY_NETWORK, fwmark.intValue, mask.intValue)) { return ret; } fwmark.permission = PERMISSION_SYSTEM; mask.permission = PERMISSION_SYSTEM; // A rule to allow legacy routes from system apps to override VPNs. return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_VPN_OVERRIDE_SYSTEM, ROUTE_TABLE_LEGACY_SYSTEM, fwmark.intValue, mask.intValue); } // Add rules to lookup the local network when specified explicitly or otherwise. WARN_UNUSED_RESULT int addLocalNetworkRules(unsigned localNetId) { if (int ret = modifyExplicitNetworkRule(localNetId, ROUTE_TABLE_LOCAL_NETWORK, PERMISSION_NONE, INVALID_UID, INVALID_UID, ACTION_ADD)) { return ret; } Fwmark fwmark; Fwmark mask; fwmark.explicitlySelected = false; mask.explicitlySelected = true; return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_LOCAL_NETWORK, ROUTE_TABLE_LOCAL_NETWORK, fwmark.intValue, mask.intValue); } // Add a new rule to look up the 'main' table, with the same selectors as the "default network" // rule, but with a lower priority. We will never create routes in the main table; it should only be // used for directly-connected routes implicitly created by the kernel when adding IP addresses. // This is necessary, for example, when adding a route through a directly-connected gateway: in // order to add the route, there must already be a directly-connected route that covers the gateway. WARN_UNUSED_RESULT int addDirectlyConnectedRule() { Fwmark fwmark; Fwmark mask; fwmark.netId = NETID_UNSET; mask.netId = FWMARK_NET_ID_MASK; return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_DIRECTLY_CONNECTED, RT_TABLE_MAIN, fwmark.intValue, mask.intValue, IIF_NONE, OIF_NONE, UID_ROOT, UID_ROOT); } // Add an explicit unreachable rule close to the end of the prioriy list to make it clear that // relying on the kernel-default "from all lookup main" rule at priority 32766 is not intended // behaviour. We do flush the kernel-default rules at startup, but having an explicit unreachable // rule will hopefully make things even clearer. WARN_UNUSED_RESULT int addUnreachableRule() { return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_UNREACHABLE, RT_TABLE_UNSPEC, MARK_UNSET, MARK_UNSET); } WARN_UNUSED_RESULT int modifyLocalNetwork(unsigned netId, const char* interface, bool add) { if (int ret = modifyIncomingPacketMark(netId, interface, PERMISSION_NONE, add)) { return ret; } return modifyOutputInterfaceRule(interface, ROUTE_TABLE_LOCAL_NETWORK, PERMISSION_NONE, INVALID_UID, INVALID_UID, add); } WARN_UNUSED_RESULT int modifyPhysicalNetwork(unsigned netId, const char* interface, Permission permission, bool add) { uint32_t table = getRouteTableForInterface(interface); if (table == RT_TABLE_UNSPEC) { return -ESRCH; } if (int ret = modifyIncomingPacketMark(netId, interface, permission, add)) { return ret; } if (int ret = modifyExplicitNetworkRule(netId, table, permission, INVALID_UID, INVALID_UID, add)) { return ret; } if (int ret = modifyOutputInterfaceRule(interface, table, permission, INVALID_UID, INVALID_UID, add)) { return ret; } return modifyImplicitNetworkRule(netId, table, permission, add); } WARN_UNUSED_RESULT int modifyVirtualNetwork(unsigned netId, const char* interface, const UidRanges& uidRanges, bool secure, bool add, bool modifyNonUidBasedRules) { uint32_t table = getRouteTableForInterface(interface); if (table == RT_TABLE_UNSPEC) { return -ESRCH; } for (const UidRanges::Range& range : uidRanges.getRanges()) { if (int ret = modifyVpnUidRangeRule(table, range.first, range.second, secure, add)) { return ret; } if (int ret = modifyExplicitNetworkRule(netId, table, PERMISSION_NONE, range.first, range.second, add)) { return ret; } if (int ret = modifyOutputInterfaceRule(interface, table, PERMISSION_NONE, range.first, range.second, add)) { return ret; } } if (modifyNonUidBasedRules) { if (int ret = modifyIncomingPacketMark(netId, interface, PERMISSION_NONE, add)) { return ret; } if (int ret = modifyVpnOutputToLocalRule(interface, add)) { return ret; } if (int ret = modifyVpnSystemPermissionRule(netId, table, secure, add)) { return ret; } return modifyExplicitNetworkRule(netId, table, PERMISSION_NONE, UID_ROOT, UID_ROOT, add); } return 0; } WARN_UNUSED_RESULT int modifyDefaultNetwork(uint16_t action, const char* interface, Permission permission) { uint32_t table = getRouteTableForInterface(interface); if (table == RT_TABLE_UNSPEC) { return -ESRCH; } Fwmark fwmark; Fwmark mask; fwmark.netId = NETID_UNSET; mask.netId = FWMARK_NET_ID_MASK; fwmark.permission = permission; mask.permission = permission; return modifyIpRule(action, RULE_PRIORITY_DEFAULT_NETWORK, table, fwmark.intValue, mask.intValue); } WARN_UNUSED_RESULT int modifyTetheredNetwork(uint16_t action, const char* inputInterface, const char* outputInterface) { uint32_t table = getRouteTableForInterface(outputInterface); if (table == RT_TABLE_UNSPEC) { return -ESRCH; } return modifyIpRule(action, RULE_PRIORITY_TETHERING, table, MARK_UNSET, MARK_UNSET, inputInterface, OIF_NONE, INVALID_UID, INVALID_UID); } // Returns 0 on success or negative errno on failure. WARN_UNUSED_RESULT int flushRules() { for (size_t i = 0; i < ARRAY_SIZE(IP_VERSIONS); ++i) { const char* argv[] = { IP_PATH, IP_VERSIONS[i], "rule", "flush", }; if (android_fork_execvp(ARRAY_SIZE(argv), const_cast<char**>(argv), NULL, false, false)) { ALOGE("failed to flush rules"); return -EREMOTEIO; } } return 0; } // Adds or removes an IPv4 or IPv6 route to the specified table and, if it's a directly-connected // route, to the main table as well. // Returns 0 on success or negative errno on failure. WARN_UNUSED_RESULT int modifyRoute(uint16_t action, const char* interface, const char* destination, const char* nexthop, RouteController::TableType tableType) { uint32_t table; switch (tableType) { case RouteController::INTERFACE: { table = getRouteTableForInterface(interface); if (table == RT_TABLE_UNSPEC) { return -ESRCH; } break; } case RouteController::LOCAL_NETWORK: { table = ROUTE_TABLE_LOCAL_NETWORK; break; } case RouteController::LEGACY_NETWORK: { table = ROUTE_TABLE_LEGACY_NETWORK; break; } case RouteController::LEGACY_SYSTEM: { table = ROUTE_TABLE_LEGACY_SYSTEM; break; } } int ret = modifyIpRoute(action, table, interface, destination, nexthop); // We allow apps to call requestRouteToHost() multiple times with the same route, so ignore // EEXIST failures when adding routes to legacy tables. if (ret && !(action == RTM_NEWROUTE && ret == -EEXIST && (tableType == RouteController::LEGACY_NETWORK || tableType == RouteController::LEGACY_SYSTEM))) { return ret; } return 0; } // Returns 0 on success or negative errno on failure. WARN_UNUSED_RESULT int flushRoutes(const char* interface) { uint32_t table = getRouteTableForInterface(interface); if (table == RT_TABLE_UNSPEC) { return -ESRCH; } char tableString[UINT32_STRLEN]; snprintf(tableString, sizeof(tableString), "%u", table); int ret = 0; for (size_t i = 0; i < ARRAY_SIZE(IP_VERSIONS); ++i) { const char* argv[] = { IP_PATH, IP_VERSIONS[i], "route", "flush", "table", tableString, }; // A flush works by dumping routes and deleting each route as it's returned, and it can // fail if something else deletes the route between the dump and the delete. This can // happen, for example, if an interface goes down while we're trying to flush its routes. // So try multiple times and only return an error if the last attempt fails. // // TODO: replace this with our own netlink code. unsigned attempts = 0; int err; do { err = android_fork_execvp(ARRAY_SIZE(argv), const_cast<char**>(argv), NULL, false, false); ++attempts; } while (err != 0 && attempts < ROUTE_FLUSH_ATTEMPTS); if (err) { ALOGE("failed to flush %s routes in table %s after %d attempts", IP_VERSIONS[i], tableString, attempts); ret = -EREMOTEIO; } } // If we failed to flush routes, the caller may elect to keep this interface around, so keep // track of its name. if (!ret) { interfaceToTable.erase(interface); } return ret; } } // namespace int RouteController::Init(unsigned localNetId) { if (int ret = flushRules()) { return ret; } if (int ret = addLegacyRouteRules()) { return ret; } if (int ret = addLocalNetworkRules(localNetId)) { return ret; } if (int ret = addDirectlyConnectedRule()) { return ret; } if (int ret = addUnreachableRule()) { return ret; } updateTableNamesFile(); return 0; } int RouteController::addInterfaceToLocalNetwork(unsigned netId, const char* interface) { return modifyLocalNetwork(netId, interface, ACTION_ADD); } int RouteController::removeInterfaceFromLocalNetwork(unsigned netId, const char* interface) { return modifyLocalNetwork(netId, interface, ACTION_DEL); } int RouteController::addInterfaceToPhysicalNetwork(unsigned netId, const char* interface, Permission permission) { if (int ret = modifyPhysicalNetwork(netId, interface, permission, ACTION_ADD)) { return ret; } updateTableNamesFile(); return 0; } int RouteController::removeInterfaceFromPhysicalNetwork(unsigned netId, const char* interface, Permission permission) { if (int ret = modifyPhysicalNetwork(netId, interface, permission, ACTION_DEL)) { return ret; } if (int ret = flushRoutes(interface)) { return ret; } updateTableNamesFile(); return 0; } int RouteController::addInterfaceToVirtualNetwork(unsigned netId, const char* interface, bool secure, const UidRanges& uidRanges) { if (int ret = modifyVirtualNetwork(netId, interface, uidRanges, secure, ACTION_ADD, MODIFY_NON_UID_BASED_RULES)) { return ret; } updateTableNamesFile(); return 0; } int RouteController::removeInterfaceFromVirtualNetwork(unsigned netId, const char* interface, bool secure, const UidRanges& uidRanges) { if (int ret = modifyVirtualNetwork(netId, interface, uidRanges, secure, ACTION_DEL, MODIFY_NON_UID_BASED_RULES)) { return ret; } if (int ret = flushRoutes(interface)) { return ret; } updateTableNamesFile(); return 0; } int RouteController::modifyPhysicalNetworkPermission(unsigned netId, const char* interface, Permission oldPermission, Permission newPermission) { // Add the new rules before deleting the old ones, to avoid race conditions. if (int ret = modifyPhysicalNetwork(netId, interface, newPermission, ACTION_ADD)) { return ret; } return modifyPhysicalNetwork(netId, interface, oldPermission, ACTION_DEL); } int RouteController::addUsersToVirtualNetwork(unsigned netId, const char* interface, bool secure, const UidRanges& uidRanges) { return modifyVirtualNetwork(netId, interface, uidRanges, secure, ACTION_ADD, !MODIFY_NON_UID_BASED_RULES); } int RouteController::removeUsersFromVirtualNetwork(unsigned netId, const char* interface, bool secure, const UidRanges& uidRanges) { return modifyVirtualNetwork(netId, interface, uidRanges, secure, ACTION_DEL, !MODIFY_NON_UID_BASED_RULES); } int RouteController::addInterfaceToDefaultNetwork(const char* interface, Permission permission) { return modifyDefaultNetwork(RTM_NEWRULE, interface, permission); } int RouteController::removeInterfaceFromDefaultNetwork(const char* interface, Permission permission) { return modifyDefaultNetwork(RTM_DELRULE, interface, permission); } int RouteController::addRoute(const char* interface, const char* destination, const char* nexthop, TableType tableType) { return modifyRoute(RTM_NEWROUTE, interface, destination, nexthop, tableType); } int RouteController::removeRoute(const char* interface, const char* destination, const char* nexthop, TableType tableType) { return modifyRoute(RTM_DELROUTE, interface, destination, nexthop, tableType); } int RouteController::enableTethering(const char* inputInterface, const char* outputInterface) { return modifyTetheredNetwork(RTM_NEWRULE, inputInterface, outputInterface); } int RouteController::disableTethering(const char* inputInterface, const char* outputInterface) { return modifyTetheredNetwork(RTM_DELRULE, inputInterface, outputInterface); } int RouteController::addVirtualNetworkFallthrough(unsigned vpnNetId, const char* physicalInterface, Permission permission) { return modifyVpnFallthroughRule(RTM_NEWRULE, vpnNetId, physicalInterface, permission); } int RouteController::removeVirtualNetworkFallthrough(unsigned vpnNetId, const char* physicalInterface, Permission permission) { return modifyVpnFallthroughRule(RTM_DELRULE, vpnNetId, physicalInterface, permission); }