Kernel  |  3.14

下载     查看原文件
C++程序  |  353行  |  9.64 KB
/*
 * Kernel iptables module to track stats for packets based on user tags.
 *
 * (C) 2011 Google, Inc
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#ifndef __XT_QTAGUID_INTERNAL_H__
#define __XT_QTAGUID_INTERNAL_H__

#include <linux/types.h>
#include <linux/rbtree.h>
#include <linux/spinlock_types.h>
#include <linux/workqueue.h>

/* Iface handling */
#define IDEBUG_MASK (1<<0)
/* Iptable Matching. Per packet. */
#define MDEBUG_MASK (1<<1)
/* Red-black tree handling. Per packet. */
#define RDEBUG_MASK (1<<2)
/* procfs ctrl/stats handling */
#define CDEBUG_MASK (1<<3)
/* dev and resource tracking */
#define DDEBUG_MASK (1<<4)

/* E.g (IDEBUG_MASK | CDEBUG_MASK | DDEBUG_MASK) */
#define DEFAULT_DEBUG_MASK 0

/*
 * (Un)Define these *DEBUG to compile out/in the pr_debug calls.
 * All undef: text size ~ 0x3030; all def: ~ 0x4404.
 */
#define IDEBUG
#define MDEBUG
#define RDEBUG
#define CDEBUG
#define DDEBUG

#define MSK_DEBUG(mask, ...) do {                           \
		if (unlikely(qtaguid_debug_mask & (mask)))  \
			pr_debug(__VA_ARGS__);              \
	} while (0)
#ifdef IDEBUG
#define IF_DEBUG(...) MSK_DEBUG(IDEBUG_MASK, __VA_ARGS__)
#else
#define IF_DEBUG(...) no_printk(__VA_ARGS__)
#endif
#ifdef MDEBUG
#define MT_DEBUG(...) MSK_DEBUG(MDEBUG_MASK, __VA_ARGS__)
#else
#define MT_DEBUG(...) no_printk(__VA_ARGS__)
#endif
#ifdef RDEBUG
#define RB_DEBUG(...) MSK_DEBUG(RDEBUG_MASK, __VA_ARGS__)
#else
#define RB_DEBUG(...) no_printk(__VA_ARGS__)
#endif
#ifdef CDEBUG
#define CT_DEBUG(...) MSK_DEBUG(CDEBUG_MASK, __VA_ARGS__)
#else
#define CT_DEBUG(...) no_printk(__VA_ARGS__)
#endif
#ifdef DDEBUG
#define DR_DEBUG(...) MSK_DEBUG(DDEBUG_MASK, __VA_ARGS__)
#else
#define DR_DEBUG(...) no_printk(__VA_ARGS__)
#endif

extern uint qtaguid_debug_mask;

/*---------------------------------------------------------------------------*/
/*
 * Tags:
 *
 * They represent what the data usage counters will be tracked against.
 * By default a tag is just based on the UID.
 * The UID is used as the base for policing, and can not be ignored.
 * So a tag will always at least represent a UID (uid_tag).
 *
 * A tag can be augmented with an "accounting tag" which is associated
 * with a UID.
 * User space can set the acct_tag portion of the tag which is then used
 * with sockets: all data belonging to that socket will be counted against the
 * tag. The policing is then based on the tag's uid_tag portion,
 * and stats are collected for the acct_tag portion separately.
 *
 * There could be
 * a:  {acct_tag=1, uid_tag=10003}
 * b:  {acct_tag=2, uid_tag=10003}
 * c:  {acct_tag=3, uid_tag=10003}
 * d:  {acct_tag=0, uid_tag=10003}
 * a, b, and c represent tags associated with specific sockets.
 * d is for the totals for that uid, including all untagged traffic.
 * Typically d is used with policing/quota rules.
 *
 * We want tag_t big enough to distinguish uid_t and acct_tag.
 * It might become a struct if needed.
 * Nothing should be using it as an int.
 */
typedef uint64_t tag_t;  /* Only used via accessors */

#define TAG_UID_MASK 0xFFFFFFFFULL
#define TAG_ACCT_MASK (~0xFFFFFFFFULL)

static inline int tag_compare(tag_t t1, tag_t t2)
{
	return t1 < t2 ? -1 : t1 == t2 ? 0 : 1;
}

static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid)
{
	return acct_tag | uid;
}
static inline tag_t make_tag_from_uid(uid_t uid)
{
	return uid;
}
static inline uid_t get_uid_from_tag(tag_t tag)
{
	return tag & TAG_UID_MASK;
}
static inline tag_t get_utag_from_tag(tag_t tag)
{
	return tag & TAG_UID_MASK;
}
static inline tag_t get_atag_from_tag(tag_t tag)
{
	return tag & TAG_ACCT_MASK;
}

static inline bool valid_atag(tag_t tag)
{
	return !(tag & TAG_UID_MASK);
}
static inline tag_t make_atag_from_value(uint32_t value)
{
	return (uint64_t)value << 32;
}
/*---------------------------------------------------------------------------*/

/*
 * Maximum number of socket tags that a UID is allowed to have active.
 * Multiple processes belonging to the same UID contribute towards this limit.
 * Special UIDs that can impersonate a UID also contribute (e.g. download
 * manager, ...)
 */
#define DEFAULT_MAX_SOCK_TAGS 1024

/*
 * For now we only track 2 sets of counters.
 * The default set is 0.
 * Userspace can activate another set for a given uid being tracked.
 */
#define IFS_MAX_COUNTER_SETS 2

enum ifs_tx_rx {
	IFS_TX,
	IFS_RX,
	IFS_MAX_DIRECTIONS
};

/* For now, TCP, UDP, the rest */
enum ifs_proto {
	IFS_TCP,
	IFS_UDP,
	IFS_PROTO_OTHER,
	IFS_MAX_PROTOS
};

struct byte_packet_counters {
	uint64_t bytes;
	uint64_t packets;
};

struct data_counters {
	struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS];
};

static inline uint64_t dc_sum_bytes(struct data_counters *counters,
				    int set,
				    enum ifs_tx_rx direction)
{
	return counters->bpc[set][direction][IFS_TCP].bytes
		+ counters->bpc[set][direction][IFS_UDP].bytes
		+ counters->bpc[set][direction][IFS_PROTO_OTHER].bytes;
}

static inline uint64_t dc_sum_packets(struct data_counters *counters,
				      int set,
				      enum ifs_tx_rx direction)
{
	return counters->bpc[set][direction][IFS_TCP].packets
		+ counters->bpc[set][direction][IFS_UDP].packets
		+ counters->bpc[set][direction][IFS_PROTO_OTHER].packets;
}


/* Generic X based nodes used as a base for rb_tree ops */
struct tag_node {
	struct rb_node node;
	tag_t tag;
};

struct tag_stat {
	struct tag_node tn;
	struct data_counters counters;
	/*
	 * If this tag is acct_tag based, we need to count against the
	 * matching parent uid_tag.
	 */
	struct data_counters *parent_counters;
};

struct iface_stat {
	struct list_head list;  /* in iface_stat_list */
	char *ifname;
	bool active;
	/* net_dev is only valid for active iface_stat */
	struct net_device *net_dev;

	struct byte_packet_counters totals_via_dev[IFS_MAX_DIRECTIONS];
	struct data_counters totals_via_skb;
	/*
	 * We keep the last_known, because some devices reset their counters
	 * just before NETDEV_UP, while some will reset just before
	 * NETDEV_REGISTER (which is more normal).
	 * So now, if the device didn't do a NETDEV_UNREGISTER and we see
	 * its current dev stats smaller that what was previously known, we
	 * assume an UNREGISTER and just use the last_known.
	 */
	struct byte_packet_counters last_known[IFS_MAX_DIRECTIONS];
	/* last_known is usable when last_known_valid is true */
	bool last_known_valid;

	struct proc_dir_entry *proc_ptr;

	struct rb_root tag_stat_tree;
	spinlock_t tag_stat_list_lock;
};

/* This is needed to create proc_dir_entries from atomic context. */
struct iface_stat_work {
	struct work_struct iface_work;
	struct iface_stat *iface_entry;
};

/*
 * Track tag that this socket is transferring data for, and not necessarily
 * the uid that owns the socket.
 * This is the tag against which tag_stat.counters will be billed.
 * These structs need to be looked up by sock and pid.
 */
struct sock_tag {
	struct rb_node sock_node;
	struct sock *sk;  /* Only used as a number, never dereferenced */
	/* The socket is needed for sockfd_put() */
	struct socket *socket;
	/* Used to associate with a given pid */
	struct list_head list;   /* in proc_qtu_data.sock_tag_list */
	pid_t pid;

	tag_t tag;
};

struct qtaguid_event_counts {
	/* Various successful events */
	atomic64_t sockets_tagged;
	atomic64_t sockets_untagged;
	atomic64_t counter_set_changes;
	atomic64_t delete_cmds;
	atomic64_t iface_events;  /* Number of NETDEV_* events handled */

	atomic64_t match_calls;   /* Number of times iptables called mt */
	/* Number of times iptables called mt from pre or post routing hooks */
	atomic64_t match_calls_prepost;
	/*
	 * match_found_sk_*: numbers related to the netfilter matching
	 * function finding a sock for the sk_buff.
	 * Total skbs processed is sum(match_found*).
	 */
	atomic64_t match_found_sk;   /* An sk was already in the sk_buff. */
	/* The connection tracker had or didn't have the sk. */
	atomic64_t match_found_sk_in_ct;
	atomic64_t match_found_no_sk_in_ct;
	/*
	 * No sk could be found. No apparent owner. Could happen with
	 * unsolicited traffic.
	 */
	atomic64_t match_no_sk;
	/*
	 * The file ptr in the sk_socket wasn't there.
	 * This might happen for traffic while the socket is being closed.
	 */
	atomic64_t match_no_sk_file;
};

/* Track the set active_set for the given tag. */
struct tag_counter_set {
	struct tag_node tn;
	int active_set;
};

/*----------------------------------------------*/
/*
 * The qtu uid data is used to track resources that are created directly or
 * indirectly by processes (uid tracked).
 * It is shared by the processes with the same uid.
 * Some of the resource will be counted to prevent further rogue allocations,
 * some will need freeing once the owner process (uid) exits.
 */
struct uid_tag_data {
	struct rb_node node;
	uid_t uid;

	/*
	 * For the uid, how many accounting tags have been set.
	 */
	int num_active_tags;
	/* Track the number of proc_qtu_data that reference it */
	int num_pqd;
	struct rb_root tag_ref_tree;
	/* No tag_node_tree_lock; use uid_tag_data_tree_lock */
};

struct tag_ref {
	struct tag_node tn;

	/*
	 * This tracks the number of active sockets that have a tag on them
	 * which matches this tag_ref.tn.tag.
	 * A tag ref can live on after the sockets are untagged.
	 * A tag ref can only be removed during a tag delete command.
	 */
	int num_sock_tags;
};

struct proc_qtu_data {
	struct rb_node node;
	pid_t pid;

	struct uid_tag_data *parent_tag_data;

	/* Tracks the sock_tags that need freeing upon this proc's death */
	struct list_head sock_tag_list;
	/* No spinlock_t sock_tag_list_lock; use the global one. */
};

/*----------------------------------------------*/
#endif  /* ifndef __XT_QTAGUID_INTERNAL_H__ */