#!/usr/bin/env python
# Copyright 2013 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
# A Python library to read and store procfs (/proc) information on Linux.
#
# Each information storage class in this file stores original data as original
# as reasonablly possible. Translation is done when requested. It is to make it
# always possible to probe the original data.
import collections
import logging
import os
import re
import struct
import sys
class _NullHandler(logging.Handler):
def emit(self, record):
pass
_LOGGER = logging.getLogger('procfs')
_LOGGER.addHandler(_NullHandler())
class ProcStat(object):
"""Reads and stores information in /proc/pid/stat."""
_PATTERN = re.compile(r'^'
'(?P<PID>-?[0-9]+) '
'\((?P<COMM>.+)\) '
'(?P<STATE>[RSDZTW]) '
'(?P<PPID>-?[0-9]+) '
'(?P<PGRP>-?[0-9]+) '
'(?P<SESSION>-?[0-9]+) '
'(?P<TTY_NR>-?[0-9]+) '
'(?P<TPGID>-?[0-9]+) '
'(?P<FLAGS>[0-9]+) '
'(?P<MINFIT>[0-9]+) '
'(?P<CMINFIT>[0-9]+) '
'(?P<MAJFIT>[0-9]+) '
'(?P<CMAJFIT>[0-9]+) '
'(?P<UTIME>[0-9]+) '
'(?P<STIME>[0-9]+) '
'(?P<CUTIME>[0-9]+) '
'(?P<CSTIME>[0-9]+) '
'(?P<PRIORITY>[0-9]+) '
'(?P<NICE>[0-9]+) '
'(?P<NUM_THREADS>[0-9]+) '
'(?P<ITREALVALUE>[0-9]+) '
'(?P<STARTTIME>[0-9]+) '
'(?P<VSIZE>[0-9]+) '
'(?P<RSS>[0-9]+) '
'(?P<RSSLIM>[0-9]+) '
'(?P<STARTCODE>[0-9]+) '
'(?P<ENDCODE>[0-9]+) '
'(?P<STARTSTACK>[0-9]+) '
'(?P<KSTKESP>[0-9]+) '
'(?P<KSTKEIP>[0-9]+) '
'(?P<SIGNAL>[0-9]+) '
'(?P<BLOCKED>[0-9]+) '
'(?P<SIGIGNORE>[0-9]+) '
'(?P<SIGCATCH>[0-9]+) '
'(?P<WCHAN>[0-9]+) '
'(?P<NSWAP>[0-9]+) '
'(?P<CNSWAP>[0-9]+) '
'(?P<EXIT_SIGNAL>[0-9]+) '
'(?P<PROCESSOR>[0-9]+) '
'(?P<RT_PRIORITY>[0-9]+) '
'(?P<POLICY>[0-9]+) '
'(?P<DELAYACCT_BLKIO_TICKS>[0-9]+) '
'(?P<GUEST_TIME>[0-9]+) '
'(?P<CGUEST_TIME>[0-9]+)', re.IGNORECASE)
def __init__(self, raw, pid, vsize, rss):
self._raw = raw
self._pid = pid
self._vsize = vsize
self._rss = rss
@staticmethod
def load_file(stat_f):
raw = stat_f.readlines()
stat = ProcStat._PATTERN.match(raw[0])
return ProcStat(raw,
stat.groupdict().get('PID'),
stat.groupdict().get('VSIZE'),
stat.groupdict().get('RSS'))
@staticmethod
def load(pid):
with open(os.path.join('/proc', str(pid), 'stat'), 'r') as stat_f:
return ProcStat.load_file(stat_f)
@property
def raw(self):
return self._raw
@property
def pid(self):
return int(self._pid)
@property
def vsize(self):
return int(self._vsize)
@property
def rss(self):
return int(self._rss)
class ProcStatm(object):
"""Reads and stores information in /proc/pid/statm."""
_PATTERN = re.compile(r'^'
'(?P<SIZE>[0-9]+) '
'(?P<RESIDENT>[0-9]+) '
'(?P<SHARE>[0-9]+) '
'(?P<TEXT>[0-9]+) '
'(?P<LIB>[0-9]+) '
'(?P<DATA>[0-9]+) '
'(?P<DT>[0-9]+)', re.IGNORECASE)
def __init__(self, raw, size, resident, share, text, lib, data, dt):
self._raw = raw
self._size = size
self._resident = resident
self._share = share
self._text = text
self._lib = lib
self._data = data
self._dt = dt
@staticmethod
def load_file(statm_f):
raw = statm_f.readlines()
statm = ProcStatm._PATTERN.match(raw[0])
return ProcStatm(raw,
statm.groupdict().get('SIZE'),
statm.groupdict().get('RESIDENT'),
statm.groupdict().get('SHARE'),
statm.groupdict().get('TEXT'),
statm.groupdict().get('LIB'),
statm.groupdict().get('DATA'),
statm.groupdict().get('DT'))
@staticmethod
def load(pid):
with open(os.path.join('/proc', str(pid), 'statm'), 'r') as statm_f:
return ProcStatm.load_file(statm_f)
@property
def raw(self):
return self._raw
@property
def size(self):
return int(self._size)
@property
def resident(self):
return int(self._resident)
@property
def share(self):
return int(self._share)
@property
def text(self):
return int(self._text)
@property
def lib(self):
return int(self._lib)
@property
def data(self):
return int(self._data)
@property
def dt(self):
return int(self._dt)
class ProcStatus(object):
"""Reads and stores information in /proc/pid/status."""
_PATTERN = re.compile(r'^(?P<NAME>[A-Za-z0-9_]+):\s+(?P<VALUE>.*)')
def __init__(self, raw, dct):
self._raw = raw
self._pid = dct.get('Pid')
self._name = dct.get('Name')
self._vm_peak = dct.get('VmPeak')
self._vm_size = dct.get('VmSize')
self._vm_lck = dct.get('VmLck')
self._vm_pin = dct.get('VmPin')
self._vm_hwm = dct.get('VmHWM')
self._vm_rss = dct.get('VmRSS')
self._vm_data = dct.get('VmData')
self._vm_stack = dct.get('VmStk')
self._vm_exe = dct.get('VmExe')
self._vm_lib = dct.get('VmLib')
self._vm_pte = dct.get('VmPTE')
self._vm_swap = dct.get('VmSwap')
@staticmethod
def load_file(status_f):
raw = status_f.readlines()
dct = {}
for line in raw:
status_match = ProcStatus._PATTERN.match(line)
if status_match:
match_dict = status_match.groupdict()
dct[match_dict['NAME']] = match_dict['VALUE']
else:
raise SyntaxError('Unknown /proc/pid/status format.')
return ProcStatus(raw, dct)
@staticmethod
def load(pid):
with open(os.path.join('/proc', str(pid), 'status'), 'r') as status_f:
return ProcStatus.load_file(status_f)
@property
def raw(self):
return self._raw
@property
def pid(self):
return int(self._pid)
@property
def vm_peak(self):
"""Returns a high-water (peak) virtual memory size in kilo-bytes."""
if self._vm_peak.endswith('kB'):
return int(self._vm_peak.split()[0])
raise ValueError('VmPeak is not in kB.')
@property
def vm_size(self):
"""Returns a virtual memory size in kilo-bytes."""
if self._vm_size.endswith('kB'):
return int(self._vm_size.split()[0])
raise ValueError('VmSize is not in kB.')
@property
def vm_hwm(self):
"""Returns a high-water (peak) resident set size (RSS) in kilo-bytes."""
if self._vm_hwm.endswith('kB'):
return int(self._vm_hwm.split()[0])
raise ValueError('VmHWM is not in kB.')
@property
def vm_rss(self):
"""Returns a resident set size (RSS) in kilo-bytes."""
if self._vm_rss.endswith('kB'):
return int(self._vm_rss.split()[0])
raise ValueError('VmRSS is not in kB.')
class ProcMapsEntry(object):
"""A class representing one line in /proc/pid/maps."""
def __init__(
self, begin, end, readable, writable, executable, private, offset,
major, minor, inode, name):
self.begin = begin
self.end = end
self.readable = readable
self.writable = writable
self.executable = executable
self.private = private
self.offset = offset
self.major = major
self.minor = minor
self.inode = inode
self.name = name
def as_dict(self):
return {
'begin': self.begin,
'end': self.end,
'readable': self.readable,
'writable': self.writable,
'executable': self.executable,
'private': self.private,
'offset': self.offset,
'major': self.major,
'minor': self.minor,
'inode': self.inode,
'name': self.name,
}
class ProcMaps(object):
"""Reads and stores information in /proc/pid/maps."""
MAPS_PATTERN = re.compile(
r'^([a-f0-9]+)-([a-f0-9]+)\s+(.)(.)(.)(.)\s+([a-f0-9]+)\s+(\S+):(\S+)\s+'
r'(\d+)\s*(.*)$', re.IGNORECASE)
def __init__(self):
self._sorted_indexes = []
self._dictionary = {}
self._sorted = True
def iter(self, condition):
if not self._sorted:
self._sorted_indexes.sort()
self._sorted = True
for index in self._sorted_indexes:
if not condition or condition(self._dictionary[index]):
yield self._dictionary[index]
def __iter__(self):
if not self._sorted:
self._sorted_indexes.sort()
self._sorted = True
for index in self._sorted_indexes:
yield self._dictionary[index]
@staticmethod
def load_file(maps_f):
table = ProcMaps()
for line in maps_f:
table.append_line(line)
return table
@staticmethod
def load(pid):
with open(os.path.join('/proc', str(pid), 'maps'), 'r') as maps_f:
return ProcMaps.load_file(maps_f)
def append_line(self, line):
entry = self.parse_line(line)
if entry:
self._append_entry(entry)
return entry
@staticmethod
def parse_line(line):
matched = ProcMaps.MAPS_PATTERN.match(line)
if matched:
return ProcMapsEntry( # pylint: disable=W0212
int(matched.group(1), 16), # begin
int(matched.group(2), 16), # end
matched.group(3), # readable
matched.group(4), # writable
matched.group(5), # executable
matched.group(6), # private
int(matched.group(7), 16), # offset
matched.group(8), # major
matched.group(9), # minor
int(matched.group(10), 10), # inode
matched.group(11) # name
)
else:
return None
@staticmethod
def constants(entry):
return (entry.writable == '-' and entry.executable == '-' and re.match(
'\S+(\.(so|dll|dylib|bundle)|chrome)((\.\d+)+\w*(\.\d+){0,3})?',
entry.name))
@staticmethod
def executable(entry):
return (entry.executable == 'x' and re.match(
'\S+(\.(so|dll|dylib|bundle)|chrome)((\.\d+)+\w*(\.\d+){0,3})?',
entry.name))
@staticmethod
def executable_and_constants(entry):
return (((entry.writable == '-' and entry.executable == '-') or
entry.executable == 'x') and re.match(
'\S+(\.(so|dll|dylib|bundle)|chrome)((\.\d+)+\w*(\.\d+){0,3})?',
entry.name))
def _append_entry(self, entry):
if self._sorted_indexes and self._sorted_indexes[-1] > entry.begin:
self._sorted = False
self._sorted_indexes.append(entry.begin)
self._dictionary[entry.begin] = entry
class ProcSmaps(object):
"""Reads and stores information in /proc/pid/smaps."""
_SMAPS_PATTERN = re.compile(r'^(?P<NAME>[A-Za-z0-9_]+):\s+(?P<VALUE>.*)')
class VMA(object):
def __init__(self):
self._size = 0
self._rss = 0
self._pss = 0
def append(self, name, value):
dct = {
'Size': '_size',
'Rss': '_rss',
'Pss': '_pss',
'Referenced': '_referenced',
'Private_Clean': '_private_clean',
'Shared_Clean': '_shared_clean',
'KernelPageSize': '_kernel_page_size',
'MMUPageSize': '_mmu_page_size',
}
if name in dct:
self.__setattr__(dct[name], value)
@property
def size(self):
if self._size.endswith('kB'):
return int(self._size.split()[0])
return int(self._size)
@property
def rss(self):
if self._rss.endswith('kB'):
return int(self._rss.split()[0])
return int(self._rss)
@property
def pss(self):
if self._pss.endswith('kB'):
return int(self._pss.split()[0])
return int(self._pss)
def __init__(self, raw, total_dct, maps, vma_internals):
self._raw = raw
self._size = total_dct['Size']
self._rss = total_dct['Rss']
self._pss = total_dct['Pss']
self._referenced = total_dct['Referenced']
self._shared_clean = total_dct['Shared_Clean']
self._private_clean = total_dct['Private_Clean']
self._kernel_page_size = total_dct['KernelPageSize']
self._mmu_page_size = total_dct['MMUPageSize']
self._maps = maps
self._vma_internals = vma_internals
@staticmethod
def load(pid):
with open(os.path.join('/proc', str(pid), 'smaps'), 'r') as smaps_f:
raw = smaps_f.readlines()
vma = None
vma_internals = collections.OrderedDict()
total_dct = collections.defaultdict(int)
maps = ProcMaps()
for line in raw:
maps_match = ProcMaps.MAPS_PATTERN.match(line)
if maps_match:
vma = maps.append_line(line.strip())
vma_internals[vma] = ProcSmaps.VMA()
else:
smaps_match = ProcSmaps._SMAPS_PATTERN.match(line)
if smaps_match:
match_dict = smaps_match.groupdict()
vma_internals[vma].append(match_dict['NAME'], match_dict['VALUE'])
total_dct[match_dict['NAME']] += int(match_dict['VALUE'].split()[0])
return ProcSmaps(raw, total_dct, maps, vma_internals)
@property
def size(self):
return self._size
@property
def rss(self):
return self._rss
@property
def referenced(self):
return self._referenced
@property
def pss(self):
return self._pss
@property
def private_clean(self):
return self._private_clean
@property
def shared_clean(self):
return self._shared_clean
@property
def kernel_page_size(self):
return self._kernel_page_size
@property
def mmu_page_size(self):
return self._mmu_page_size
@property
def vma_internals(self):
return self._vma_internals
class ProcPagemap(object):
"""Reads and stores partial information in /proc/pid/pagemap.
It picks up virtual addresses to read based on ProcMaps (/proc/pid/maps).
See https://www.kernel.org/doc/Documentation/vm/pagemap.txt for details.
"""
_BYTES_PER_PAGEMAP_VALUE = 8
_BYTES_PER_OS_PAGE = 4096
_VIRTUAL_TO_PAGEMAP_OFFSET = _BYTES_PER_OS_PAGE / _BYTES_PER_PAGEMAP_VALUE
_MASK_PRESENT = 1 << 63
_MASK_SWAPPED = 1 << 62
_MASK_FILEPAGE_OR_SHAREDANON = 1 << 61
_MASK_SOFTDIRTY = 1 << 55
_MASK_PFN = (1 << 55) - 1
class VMA(object):
def __init__(self, vsize, present, swapped, pageframes):
self._vsize = vsize
self._present = present
self._swapped = swapped
self._pageframes = pageframes
@property
def vsize(self):
return int(self._vsize)
@property
def present(self):
return int(self._present)
@property
def swapped(self):
return int(self._swapped)
@property
def pageframes(self):
return self._pageframes
def __init__(self, vsize, present, swapped, vma_internals, in_process_dup):
self._vsize = vsize
self._present = present
self._swapped = swapped
self._vma_internals = vma_internals
self._in_process_dup = in_process_dup
@staticmethod
def load(pid, maps):
total_present = 0
total_swapped = 0
total_vsize = 0
in_process_dup = 0
vma_internals = collections.OrderedDict()
process_pageframe_set = set()
pagemap_fd = os.open(
os.path.join('/proc', str(pid), 'pagemap'), os.O_RDONLY)
for vma in maps:
present = 0
swapped = 0
vsize = 0
pageframes = collections.defaultdict(int)
begin_offset = ProcPagemap._offset(vma.begin)
chunk_size = ProcPagemap._offset(vma.end) - begin_offset
os.lseek(pagemap_fd, begin_offset, os.SEEK_SET)
buf = os.read(pagemap_fd, chunk_size)
if len(buf) < chunk_size:
_LOGGER.warn('Failed to read pagemap at 0x%x in %d.' % (vma.begin, pid))
pagemap_values = struct.unpack(
'=%dQ' % (len(buf) / ProcPagemap._BYTES_PER_PAGEMAP_VALUE), buf)
for pagemap_value in pagemap_values:
vsize += ProcPagemap._BYTES_PER_OS_PAGE
if pagemap_value & ProcPagemap._MASK_PRESENT:
if (pagemap_value & ProcPagemap._MASK_PFN) in process_pageframe_set:
in_process_dup += ProcPagemap._BYTES_PER_OS_PAGE
else:
process_pageframe_set.add(pagemap_value & ProcPagemap._MASK_PFN)
if (pagemap_value & ProcPagemap._MASK_PFN) not in pageframes:
present += ProcPagemap._BYTES_PER_OS_PAGE
pageframes[pagemap_value & ProcPagemap._MASK_PFN] += 1
if pagemap_value & ProcPagemap._MASK_SWAPPED:
swapped += ProcPagemap._BYTES_PER_OS_PAGE
vma_internals[vma] = ProcPagemap.VMA(vsize, present, swapped, pageframes)
total_present += present
total_swapped += swapped
total_vsize += vsize
os.close(pagemap_fd)
return ProcPagemap(total_vsize, total_present, total_swapped,
vma_internals, in_process_dup)
@staticmethod
def _offset(virtual_address):
return virtual_address / ProcPagemap._VIRTUAL_TO_PAGEMAP_OFFSET
@property
def vsize(self):
return int(self._vsize)
@property
def present(self):
return int(self._present)
@property
def swapped(self):
return int(self._swapped)
@property
def vma_internals(self):
return self._vma_internals
class _ProcessMemory(object):
"""Aggregates process memory information from /proc for manual testing."""
def __init__(self, pid):
self._pid = pid
self._maps = None
self._pagemap = None
self._stat = None
self._status = None
self._statm = None
self._smaps = []
def _read(self, proc_file):
lines = []
with open(os.path.join('/proc', str(self._pid), proc_file), 'r') as proc_f:
lines = proc_f.readlines()
return lines
def read_all(self):
self.read_stat()
self.read_statm()
self.read_status()
self.read_smaps()
self.read_maps()
self.read_pagemap(self._maps)
def read_maps(self):
self._maps = ProcMaps.load(self._pid)
def read_pagemap(self, maps):
self._pagemap = ProcPagemap.load(self._pid, maps)
def read_smaps(self):
self._smaps = ProcSmaps.load(self._pid)
def read_stat(self):
self._stat = ProcStat.load(self._pid)
def read_statm(self):
self._statm = ProcStatm.load(self._pid)
def read_status(self):
self._status = ProcStatus.load(self._pid)
@property
def pid(self):
return self._pid
@property
def maps(self):
return self._maps
@property
def pagemap(self):
return self._pagemap
@property
def smaps(self):
return self._smaps
@property
def stat(self):
return self._stat
@property
def statm(self):
return self._statm
@property
def status(self):
return self._status
def main(argv):
"""The main function for manual testing."""
_LOGGER.setLevel(logging.WARNING)
handler = logging.StreamHandler()
handler.setLevel(logging.WARNING)
handler.setFormatter(logging.Formatter(
'%(asctime)s:%(name)s:%(levelname)s:%(message)s'))
_LOGGER.addHandler(handler)
pids = []
for arg in argv[1:]:
try:
pid = int(arg)
except ValueError:
raise SyntaxError("%s is not an integer." % arg)
else:
pids.append(pid)
procs = {}
for pid in pids:
procs[pid] = _ProcessMemory(pid)
procs[pid].read_all()
print '=== PID: %d ===' % pid
print ' stat: %d' % procs[pid].stat.vsize
print ' statm: %d' % (procs[pid].statm.size * 4096)
print ' status: %d (Peak:%d)' % (procs[pid].status.vm_size * 1024,
procs[pid].status.vm_peak * 1024)
print ' smaps: %d' % (procs[pid].smaps.size * 1024)
print 'pagemap: %d' % procs[pid].pagemap.vsize
print ' stat: %d' % (procs[pid].stat.rss * 4096)
print ' statm: %d' % (procs[pid].statm.resident * 4096)
print ' status: %d (Peak:%d)' % (procs[pid].status.vm_rss * 1024,
procs[pid].status.vm_hwm * 1024)
print ' smaps: %d' % (procs[pid].smaps.rss * 1024)
print 'pagemap: %d' % procs[pid].pagemap.present
return 0
if __name__ == '__main__':
sys.exit(main(sys.argv))