ck: net: track the pid who created socks

to #32682624

This adds sk_pid in struct sock to store the pid who created this sock.
Also a new INET type INET_DIAG_PID is included for userspace programs.

sk_pid is initialized when process creates AF_INET or AF_INET6 socket.
Given that the impact on performance, it won't be updated when read or
write on this socket. This field stores the current namespace pid.
Userspace programs, such as ss (iproute2) and NX (nx tcp top), could
fetch this value with INET_DIAG_PID, and netlink payload contains the
sock pid without iterating /proc/${pid}/fd. This records the pid who
created the socket, it could be enough to help us trace the TCP flows with
very low overhead.

Be careful of the value of INET_DIAG_PID, upstream is introducing more
and more types, so we leaves a hole in the enum for the future extension.

Signed-off-by: Tony Lu <tonylu@linux.alibaba.com>
Acked-by: Dust Li <dust.li@linux.alibaba.com>
Signed-off-by: Qiao Ma <mqaio@linux.alibaba.com>
Acked-by: Tony Lu <tonylu@linux.alibaba.com>
This commit is contained in:
Tony Lu 2020-07-24 15:49:11 +08:00 committed by Qiao Ma
parent 00a9b94026
commit b578e4b8ed
5 changed files with 14 additions and 0 deletions

View File

@ -72,6 +72,7 @@ static inline size_t inet_diag_msg_attrs_size(void)
#endif
+ nla_total_size(4) /* INET_DIAG_MARK */
+ nla_total_size(4) /* INET_DIAG_CLASS_ID */
+ nla_total_size(4) /* INET_DIAG_PID */
#ifdef CONFIG_SOCK_CGROUP_DATA
+ nla_total_size_64bit(sizeof(u64)) /* INET_DIAG_CGROUP_ID */
#endif

View File

@ -344,6 +344,7 @@ struct bpf_local_storage;
* @sk_txtime_report_errors: set report errors mode for SO_TXTIME
* @sk_txtime_unused: unused txtime flags
* @sk_toa_data: tcp option address (toa) data
* @sk_pid: for which process created this sock
*/
struct sock {
/*
@ -525,6 +526,8 @@ struct sock {
#endif
struct rcu_head sk_rcu;
pid_t sk_pid;
CK_HOTFIX_RESERVE(1)
CK_HOTFIX_RESERVE(2)
};

View File

@ -161,6 +161,7 @@ enum {
INET_DIAG_SK_BPF_STORAGES,
INET_DIAG_CGROUP_ID,
INET_DIAG_SOCKOPT,
INET_DIAG_PID = 30, /* response attribute only for sk_pid */
__INET_DIAG_MAX,
};

View File

@ -351,6 +351,9 @@ lookup_protocol:
sk->sk_protocol = protocol;
sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
if (!kern)
sk->sk_pid = task_pid_vnr(current);
inet->uc_ttl = -1;
inet->mc_loop = 1;
inet->mc_ttl = 1;

View File

@ -359,6 +359,12 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
goto errout;
}
if ((ext & (1 << (INET_DIAG_PID - 1)) ||
ext & (1 << (INET_DIAG_INFO - 1))) &&
net_admin && sk->sk_pid)
if (nla_put_u32(skb, INET_DIAG_PID, sk->sk_pid))
goto errout;
/* Keep it at the end for potential retry with a larger skb,
* or else do best-effort fitting, which is only done for the
* first_nlmsg.