File: //usr/local/qcloud/monitor/barad/plugin/collector/utils/collect_tool.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__author__ = 'hetiulin'
import time
import os
import sys
import re
import traceback
import logging
sys.path.append(os.getcwd() + "/../../../comm")
import constant
import subprocess
import commands
import psutil
import urllib2
from cutils import console_logger, generate_config, is_metal, CommUtils
from base_process import BaseProcess
class filter_fun(BaseProcess):
_instance = None
def __new__(cls, *args, **kwargs):
if cls._instance is None:
cls._instance = super(filter_fun, cls).__new__(cls, *args, **kwargs)
return cls._instance
def __init__(self):
self.file_path = '/etc/barad_agent_filter.ini'
self._filter = {}
def get_is_filter(self, label, value):
if not os.path.exists(self.file_path):
return 0
if label not in self._filter.keys():
self._filter.setdefault(label, -1)
if self._filter[label] == -1:
self._filter[label] = int(generate_config(self.file_path, label)[value])
return self._filter[label]
'''cpu info collect'''
class CpuCollect(BaseProcess):
def __init__(self):
BaseProcess.__init__(self, constant.PLUGIN_CONFIG_PATH, self.__class__.__name__)
self._prev_data = {'cpudrops': -1, 'timestamp': 0}
def get_cpu_load(self):
cmd = "cat /proc/loadavg"
process = os.popen(cmd)
out = process.readline().strip().split(' ')
process.close()
cpu_load_1 = float(out[0])
cpu_load_5 = float(out[1])
cpu_load_15 = float(out[2])
cpu_queue_length = int(out[3].split('/')[0])
total_threads = int(out[3].split('/')[1])
return (cpu_load_1, cpu_load_5, cpu_load_15, cpu_queue_length, total_threads)
def get_cpu_usage(self):
cpu_percent = psutil.cpu_percent()
iowait = psutil.cpu_times_percent().iowait
cpu_usage = cpu_percent - iowait
return cpu_usage
def get_softirq_package_lost(self):
cmd = "cat /proc/net/softnet_stat | awk '{print $2}'"
process = os.popen(cmd)
out = process.read().strip().split('\n')
process.close()
cpudrops = sum([int(i, 16) for i in out])
curr_time = int(time.time())
if self._prev_data['cpudrops'] == -1:
softirq_package_lost = self._prev_data['cpudrops']
else:
softirq_package_lost = float(cpudrops - self._prev_data['cpudrops']) / (curr_time - self._prev_data['timestamp'])
self._prev_data = {'cpudrops': cpudrops, 'timestamp': curr_time}
return softirq_package_lost
def test_cpu_collect():
obj = CpuCollect()
print ("cpu's usage: %d" % obj.get_cpu_usage())
print (obj.get_softirq_package_lost())
'''mem info collect'''
class MemCollect(BaseProcess):
flag=0
def __init__(self):
BaseProcess.__init__(self, constant.PLUGIN_CONFIG_PATH, self.__class__.__name__)
self.filter_fun = filter_fun()
self.mem_used_filter_flag = self.get_new_mem_config()
self._prev_data = {
'sin': 0,
'sout': 0,
'time': 0
}
def get_new_mem_config(self):
filter = 0
try:
filter = self.filter_fun.get_is_filter('mem_filter', 'new_caculate_mem')
except Exception as e:
self.logger().error(traceback.format_exc().replace('\n', '\\n'))
return filter
def get_mem_info(self):
file_path='/proc/meminfo'
memFree=0
memtotal=0
memavailable=0
cached=0
buffers=0
valid_flag=0
if os.path.exists(file_path):
try :
with open(file_path,"r") as file :
flag=0
for line in file :
if line.startswith('MemFree'):
memFree = int(line.strip().split()[1])
if line.startswith('MemTotal'):
memtotal = int(line.strip().split()[1])
if line.startswith('MemAvailable'):
memavailable = float(line.strip().split()[1])
if line.startswith('Cached'):
cached = int(line.strip().split()[1])
if line.startswith('Buffers'):
buffers = int(line.strip().split()[1])
if memtotal != 0 and memavailable != 0:
valid_flag=1
except Exception:
if flag == 0:
self.logger().error(traceback.format_exc().replace('\n', '\\n'))
falg=1
valid_flag=0
else:
if flag == 0:
self.logger().error("os don't contains /proc/meminfo")
flag=1
valid_flag=0
info = psutil.virtual_memory()
#if mem_used_filter_flag == 1 used new caculate method
#default used old caculate method
if self.mem_used_filter_flag == 1:
if valid_flag == 1:
return ((int)(memtotal - memavailable) >> 10, memtotal >> 10, ((memtotal - memavailable)/memtotal)*100, (memtotal - memFree) >> 10, cached >> 10, buffers >> 10)
else:
return ((info.total - info.available) >> 20, info.total >> 20, info.percent, (info.total - info.free) >> 20, info.cached >> 20, info.buffers >> 20)
else:
if valid_flag == 1:
return ((info.total - info.available) >> 20, info.total >> 20, ((memtotal - memavailable)/memtotal)*100, (info.total >> 20) - (memFree >> 10), info.cached >> 20, info.buffers >> 20)
else:
return ((info.total - info.available) >> 20, info.total >> 20, info.percent, (info.total >> 20) - (memFree >> 10), info.cached >> 20, info.buffers >> 20)
def get_swap_info(self):
info = psutil.swap_memory()
curr_time = int(time.time())
swin = 0
swout = 0
if self._prev_data['time'] > 0:
timediff = self._prev_data['time'] - curr_time
swin = (info.sin - self._prev_data['sin'])/timediff
swout = (info.sout - self._prev_data['sout'])/timediff
self._prev_data['sin'] = info.sin
self._prev_data['sout'] = info.sout
self._prev_data['time'] = curr_time
return (info.used >> 20, info.total >> 20, swin, swout)
def get_all_process_mem(self):
total_virtual_size = 0
total_private_size = 0
for pid in psutil.pids():
try:
process = psutil.Process(pid)
for info in process.memory_maps():
total_virtual_size += info.size
total_private_size += info.private_clean + info.private_dirty
except psutil.NoSuchProcess:
self.logger().error(traceback.format_exc().replace('\n', '\\n'))
return (total_virtual_size >> 20, total_private_size >> 20)
def get_memory_status(self):
cmd = "cat /proc/meminfo |grep 'HardwareCorrupted' |awk '{print $2}'"
output = CommUtils.ExecuteTimeoutCommand(cmd, 3)
if len(output) == 0:
return -1
elif int(output) > 0:
return 1
else:
return 0
def get_specific_field_sum(self, vmstat_path, startStr):
sum = 0
try :
for line in open(vmstat_path,"r"):
if line.startswith(startStr):
sum = sum + int(line.split()[-1].strip())
except Exception :
self.logger().error(traceback.format_exc().replace('\n', '\\n'))
return sum
def test_mem_collect():
mem_collect = MemCollect()
print mem_collect.get_mem_info()
print mem_collect.get_swap_info()
print mem_collect.get_all_process_mem()
print mem_collect.get_specific_field_sum("/proc/vmstat","pgscan_")
class DiskCollect_tool(BaseProcess):
def __init__(self):
BaseProcess.__init__(self, constant.PLUGIN_CONFIG_PATH, self.__class__.__name__)
self.retry_time = [60, 600, 1800, 3600]
self.req_fail_count = -1
self.req_last_fail_time = 0
self.disk_list_id = []
self.diskid1 = ""
self.metadata = ''
def get_disk_list(self):
if self.diskid1 != '':
return self.diskid1
if self.req_last_fail_time != 0 and (int(time.time()) - self.req_last_fail_time < self.retry_time[self.req_fail_count]):
return self.diskid1
if self.metadata == '':
self.metadata = generate_config(constant.PLUGIN_CONFIG_PATH, "MetaDataServer")
partitions = psutil.disk_partitions()
for item in partitions:
if item.opts.startswith('rw'):
# added by hetiulin, 2015/01/29
# trace the real path of a partition
if os.path.islink(item.device):
device = os.readlink(item.device)
else:
device = item.device
# end
disk_list = device[device.rfind('/')+1:]
vd_name = re.search("[a-z]+", disk_list)
if vd_name:
vd_name = vd_name.group()
else:
vd_name = disk_list
if os.path.exists('/sys/block/%s/serial' % vd_name):
process = open('/sys/block/%s/serial' % vd_name)
diskid = process.read().strip().split("\n")[0]
process.close()
else:
diskid = ""
self.disk_list_id.append(diskid)
url = self.metadata['metadata_url'] + 'volumes'
try:
response = urllib2.urlopen(url, timeout=2)
content = response.read().replace('/', '').splitlines()
except Exception as e:
self.logger().error("get disk_id from metadata fail, %s " % e)
self.diskid1 = ""
content = self.disk_list_id
self.req_last_fail_time = int(time.time())
if self.req_fail_count < len(self.retry_time) - 1:
self.req_fail_count += 1
if len(list(set(content) - set(self.disk_list_id))) == 1:
self.diskid1 = list(set(content) - set(self.disk_list_id))[0]
else:
self.diskid1 = ""
self.req_last_fail_time = int(time.time())
if self.req_fail_count < len(self.retry_time) - 1:
self.req_fail_count += 1
#print diskid
return self.diskid1
def get_diskid_use_diskname(self,diskname):
diskid = ''
try :
vd_name = re.search("[a-z]+", diskname)
if vd_name:
vd_name = vd_name.group()
else:
vd_name = diskname
if os.path.exists('/sys/block/%s/serial' % vd_name):
process = open('/sys/block/%s/serial' % vd_name)
diskid = process.read().strip().split("\n")[0]
process.close()
else:
diskid = ""
if diskid == "" and re.match("vd", diskname):
diskid = self.get_disk_list()
#diskid = '123456'
if re.search(r'\d', diskname) and diskid != "":
diskid = diskid + '-' + diskname
except Exception :
self.logger().error(traceback.format_exc().replace('\n', '\\n'))
return diskid
# Modified by hetiulin, 2014-12-26, implementing with psutil
'''get disk info'''
class DiskCollect(BaseProcess):
disk_ro_file = '/run/.DISKRO'
def __init__(self):
BaseProcess.__init__(self, constant.PLUGIN_CONFIG_PATH, self.__class__.__name__)
self._prev_diskio = psutil.disk_io_counters(perdisk=True)
self._lastTime = time.time();
self.disk_tool = DiskCollect_tool()
self.filter_fun = filter_fun()
def __get_disk_io(self):
disk_io = {}
disk_ro_file = {}
curr_diskio = psutil.disk_io_counters(perdisk=True)
currTime = time.time();
for name, curr in curr_diskio.items():
if re.match('nvme',name) == None:
continue
read_io = 0.0
write_io = 0.0
read_iops = 0.0
write_iops = 0.0
await = 0.0
tmpio = 0.0
ioutil = 0.0
if self._prev_diskio.has_key(name):
prev = self._prev_diskio[name]
read_time = float(curr.read_time - prev.read_time)
write_time = float(curr.write_time - prev.write_time)
read_bytes = curr.read_bytes - prev.read_bytes
write_bytes = curr.write_bytes - prev.write_bytes
read_count = curr.read_count - prev.read_count
write_count = curr.write_count - prev.write_count
# in ms
busy_time = float(curr.busy_time - prev.busy_time)
rw_count = (curr.read_count - prev.read_count) + (curr.write_count - prev.write_count)
timediff = currTime - self._lastTime
if timediff > 0:
# in KB
read_io = float('%0.2f' % (float(read_bytes/1024 / timediff)))
write_io = float('%0.2f' % (float(write_bytes/1024 / timediff)))
read_iops = float('%0.2f' % (float(read_count) / timediff ))
write_iops = float('%0.2f' % (float(write_count) / timediff))
#IO时间占用总时间百分比
ioutil = 100*float('%0.4f' % (busy_time / timediff /1000))
if rw_count > 0:
await = float('%0.2f' % ((read_time + write_time) / rw_count))
#单次IO所耗费的时间 time per IO in ms
tmpio = float('%0.2f' % (busy_time / rw_count))
disk_io[name] = {'disk_read_iops':read_iops,'disk_write_iops': write_iops,'diskReadtraff': read_io, 'diskWritetraff': write_io,'diskAwait': await,'disk_tmpio':tmpio, 'disk_ioutil':ioutil if ioutil<100 else 100}
self._prev_diskio = curr_diskio
self._lastTime = currTime
return disk_io
# disk info
def get_disk_info(self):
cmd = "lsblk -l | awk '{if($6 == \"disk\" && match($1,\"nvme\")){print $1}}'"
output = CommUtils.ExecuteTimeoutCommand(cmd, 3).splitlines()
if len(output) == 0:
return -1
disk_io = self.__get_disk_io()
disk_info = {}
for disk_t in output:
disk_info[disk_t] = {
'diskReadtraff': disk_io[disk_t]['diskReadtraff'],
'diskWritetraff': disk_io[disk_t]['diskWritetraff'],
'disk_readiops':disk_io[disk_t]['disk_read_iops'],
'disk_writeiops':disk_io[disk_t]['disk_write_iops'],
'disk_tmpio':disk_io[disk_t]['disk_tmpio'],
'disk_ioutil':disk_io[disk_t]['disk_ioutil'],
'diskAwait': disk_io[disk_t]['diskAwait']
}
return disk_info
def get_ro_is_filter(self):
filter = 0
try:
filter = self.filter_fun.get_is_filter('rodev_filter', 'rodev_filter_fun')
except Exception as e:
self.logger().error(traceback.format_exc().replace('\n', '\\n'))
return filter
'''disk partition info'''
def get_partition_info(self):
def __get_without_fstype(without_fstype):
mount_info = []
try:
with open("/etc/mtab", 'r') as file:
for line in file:
# 跳过注释和空行
if line.startswith('#') or not line.strip():
continue
parts = line.split()
if len(parts) >= 6:
device, mount_point, fstype, _, _, _ = parts[:6]
if fstype not in without_fstype:
continue
mount_info.append({
'device': device,
'mount_point': mount_point,
'fstype': fstype
})
else:
self.logger().error("Warning: Skipping malformed line")
return mount_info
except IOError:
self.logger().info("Error: The file /etc/mtab does not exist.")
return []
except Exception as e:
self.logger().error("An error occurred:",str(e))
return []
def __check_ro(root_path):
ret = False
if not os.path.isdir(root_path):
return ret
tmpfile = root_path + "/WTEST.TMP"
buffer = '0'*4096
try:
with open(tmpfile, 'w') as fd:
fd.write(buffer)
fd.flush()
except IOError:
ret = True
return ret
try:
os.remove(tmpfile)
except Exception:
self.logger().error(traceback.format_exc().replace('\n', '\\n'))
return ret
disk_part = {}
partitions = psutil.disk_partitions()
without_fstype = ['lustre','nfs','cifs','nfs4']
all_without_fstype = __get_without_fstype(without_fstype)
for item in partitions:
if item.opts.startswith('rw') or (self.get_ro_is_filter() == 1 and item.opts.startswith('ro')):
# added by hetiulin, 2015/01/29
# trace the real path of a partition
if os.path.islink(item.device):
device = os.readlink(item.device)
else:
device = item.device
# end
name = device[device.rfind('/')+1:]
#lustre type, thename is the mount point, eg: 10.0.x.x@tcp:/xxxxx/cfs/
if not name:
name = device[device.rfind(':')+1:]
#only get once when mutiple mount points
if disk_part.has_key(name):
continue
usage = psutil.disk_usage(item.mountpoint)
diskid = self.disk_tool.get_diskid_use_diskname(name)
iusage = os.statvfs(item.mountpoint)
disk_usage = round(100*float('%0.4f' % (float(usage.used)/(usage.used + usage.free))),2)
inode_usage = round(100*float('%0.4f' % (float(iusage.f_files - iusage.f_ffree)/iusage.f_files))) if iusage.f_files>0 else 0
ro = False
#cfs disk do not need to check
cfs = False
for fstype in without_fstype:
for line in all_without_fstype:
if((fstype == line['fstype']) and (item.mountpoint == line['mount_point'])):
cfs = True
break
if not cfs :
ro = __check_ro(item.mountpoint)
disk_part[name] = {
'diskType': item.fstype,
'diskSize': usage.total/1024/1024,
'diskUsage': disk_usage,
'diskRO': ro,
'inodeUsage': inode_usage,
'diskId': diskid
}
return disk_part
def record_disk_ro_info(self, disk_ro_info):
try:
with open(DiskCollect.disk_ro_file, 'w') as fd:
for key in disk_ro_info:
fd.write(key + "|" + disk_ro_info[key] +"\n")
except IOError :
self.logger().error(traceback.format_exc().replace('\n', '\\n'))
def get_disk_ro_info(self):
disk_ro_info = {}
try:
if os.path.exists(DiskCollect.disk_ro_file):
with open(DiskCollect.disk_ro_file) as fd:
for line in fd.readlines():
str = line.strip().split("|")
disk_ro_info[str[0]] = str[1]
except Exception :
self.logger().error(traceback.format_exc().replace('\n', '\\n'))
return disk_ro_info
def test_disk_collect():
disk_collect = DiskCollect()
time.sleep(5)
import json
print json.dumps(disk_collect.get_partition_info(), indent=1)
print (disk_collect.get_disk_ro_info())
def get_snmp_info():
tcp_info = udp_info = ip_info = {}
try :
with open('/proc/net/snmp') as file :
for line in file:
if line.startswith('Tcp:'):
if 'CurrEstab' in line:
tcp_snmp_key = line.split()[1::]
else:
tcp_snmp_val = [int(x) for x in line.split()[1::]]
tcp_info = dict(zip(tcp_snmp_key, tcp_snmp_val))
continue
if line.startswith('Udp:'):
if 'OutDatagrams' in line:
udp_info_key = line.split()[1::]
else:
udp_info_val = [int(x) for x in line.split()[1::]]
udp_info = dict(zip(udp_info_key, udp_info_val))
continue
if line.startswith('Ip:'):
if 'ReasmFails' in line:
ip_snmp_key = line.split()[1::]
else:
ip_snmp_val = [int(x) for x in line.split()[1::]]
ip_info = dict(zip(ip_snmp_key, ip_snmp_val))
continue
except Exception :
self.logger().error(traceback.format_exc().replace('\n', '\\n'))
return tcp_info, udp_info, ip_info
class TcpCollect(BaseProcess):
nf_conntrack = '/proc/net/stat/nf_conntrack'
ip_conntrack = '/proc/net/stat/ip_conntrack'
def __init__(self):
BaseProcess.__init__(self, constant.PLUGIN_CONFIG_PATH, self.__class__.__name__)
self._tcp_time_await = 0
self._tcp_netstat = {}
self._tcpStatus = ['LISTEN','ESTABLISHED','CLOSE_WAIT','TIME_WAIT','SYN_SENT','SYN_RECV','FIN_WAIT1','FIN_WAIT2']
self._tcpStatusMap = {}
self._prev_data = {'timestamp': -1, 'tcp_conn': {'active_open': -1, 'passive_open': -1, 'curr_estab': -1, 'attempt_fails': -1}, 'udp_info': {'udp_in_datagrams': -1, 'udp_out_datagrams': -1, 'udp_in_errors': -1, 'udp_rcvbu_errors': -1, 'udp_sndbuf_errors': -1, 'ip_reasm_fails': -1}}
# get the sum of /proc/net/stat/*_conntrack group by colum
def get_tcp_drop_stat(self):
arr = []
title = []
return_val = {}
val_list = []
conntrack_file_path = ''
if os.path.exists(self.nf_conntrack):
conntrack_file_path = self.nf_conntrack
elif os.path.exists(self.ip_conntrack):
conntrack_file_path = self.ip_conntrack
try :
arr=[]
if os.path.exists(conntrack_file_path):
with open(conntrack_file_path,"r") as file :
lines = file.readlines()
title = lines[0].strip().split()
for line in lines[1::] :
arr.append(line.strip().split())
else:
self.logger().info("config file : %s donot exists !,please check your os !", conntrack_file_path)
arr = [[ int(val,16) for val in lt ] for lt in arr]
val_list = [sum(x) for x in zip(*arr)]
except Exception :
self.logger().error(traceback.format_exc().replace('\n', '\\n'))
return dict(zip(title, val_list))
'''
tcp connection additions per second
'''
def get_net_info(self):
tcp_info, udp_info, ip_info = get_snmp_info()
snmp_info = {'tcp_conn_active': -1, 'tcp_conn_passive': -1, 'tcp_attempt_fails': -1, 'tcp_curr_estab': -1, 'udp_recv_speed': -1, 'udp_send_speed': -1, 'udp_inerrors_speed': -1, 'udp_rcvbuerrors_speed': -1, 'udp_sndbuferrors_speed': -1, 'ip_reasmfails_speed': -1}
# Tcp: RtoAlgorithm RtoMin RtoMax MaxConn ActiveOpens PassiveOpens AttemptFails EstabResets CurrEstab InSegs OutSegs RetransSegs InErrs OutRsts InCsumErrors
active_open = int(tcp_info.get('ActiveOpens', 0))
passive_open = int(tcp_info.get('PassiveOpens', 0))
attempt_fails = int(tcp_info.get('AttemptFails', 0))
curr_estab = int(tcp_info.get('CurrEstab', 0))
# Udp: InDatagrams NoPorts InErrors OutDatagrams RcvbufErrors SndbufErrors InCsumErrors
udp_in_datagrams = int(udp_info.get('InDatagrams', 0))
udp_in_errors = int(udp_info.get('InErrors', 0))
udp_out_datagrams = int(udp_info.get('OutDatagrams', 0))
udp_rcvbu_errors = int(udp_info.get('RcvbufErrors', 0))
udp_sndbuf_errors = int(udp_info.get('SndbufErrors', 0))
# Ip: Forwarding DefaultTTL InReceives InHdrErrors InAddrErrors ForwDatagrams InUnknownProtos InDiscards InDelivers OutRequests OutDiscards OutNoRoutes ReasmTimeout ReasmReqds ReasmOKs ReasmFails FragOKs FragFails FragCreates
ip_reasm_fails = int(ip_info.get('ReasmFails', 0))
current_time = int(time.time())
if self._prev_data['timestamp'] > 0 and current_time > self._prev_data['timestamp']:
tcp_conn_active = float(active_open - self._prev_data['tcp_conn']['active_open']) / (current_time - self._prev_data['timestamp'])
snmp_info['tcp_conn_active'] = tcp_conn_active
tcp_conn_passive = float(passive_open - self._prev_data['tcp_conn']['passive_open']) / (current_time - self._prev_data['timestamp'])
snmp_info['tcp_conn_passive'] = tcp_conn_passive
tcp_attempt_fails = float(attempt_fails - self._prev_data['tcp_conn']['attempt_fails']) / (current_time - self._prev_data['timestamp'])
snmp_info['tcp_attempt_fails'] = tcp_attempt_fails
tcp_curr_estab = int(curr_estab)
snmp_info['tcp_curr_estab'] = tcp_curr_estab
udp_recv_speed = float(udp_in_datagrams - self._prev_data['udp_info']['udp_in_datagrams']) / (current_time - self._prev_data['timestamp'])
snmp_info['udp_recv_speed'] = udp_recv_speed
udp_send_speed = float(udp_out_datagrams - self._prev_data['udp_info']['udp_out_datagrams']) / (current_time - self._prev_data['timestamp'])
snmp_info['udp_send_speed'] = udp_send_speed
udp_inerrors_speed = float(udp_in_errors - self._prev_data['udp_info']['udp_in_errors']) / (current_time - self._prev_data['timestamp'])
snmp_info['udp_inerrors_speed'] = udp_inerrors_speed
udp_rcvbuerrors_speed = float(udp_rcvbu_errors - self._prev_data['udp_info']['udp_rcvbu_errors']) / (current_time - self._prev_data['timestamp'])
snmp_info['udp_rcvbuerrors_speed'] = udp_rcvbuerrors_speed
udp_sndbuferrors_speed = float(udp_sndbuf_errors - self._prev_data['udp_info']['udp_sndbuf_errors']) / (current_time - self._prev_data['timestamp'])
snmp_info['udp_sndbuferrors_speed'] = udp_sndbuferrors_speed
ip_reasmfails_speed = float(ip_reasm_fails - self._prev_data['udp_info']['ip_reasm_fails']) / (current_time - self._prev_data['timestamp'])
snmp_info['ip_reasmfails_speed'] = ip_reasmfails_speed
self._prev_data['timestamp'] = current_time
self._prev_data['tcp_conn'] = {'active_open': active_open, 'passive_open': passive_open, 'curr_estab': curr_estab, 'attempt_fails': attempt_fails}
self._prev_data['udp_info'] = {'udp_in_datagrams': udp_in_datagrams, 'udp_out_datagrams': udp_out_datagrams,'udp_in_errors': udp_in_errors, 'udp_rcvbu_errors': udp_rcvbu_errors,'udp_sndbuf_errors': udp_sndbuf_errors, 'ip_reasm_fails': ip_reasm_fails}
return snmp_info
def getTimeAwait(self):
tcp_stat = {}
try :
with open('/proc/net/sockstat') as file:
for line in file :
if 'TCP' in line :
line_info = line.split()
tcp_stat_key = line_info[1::2]
tcp_stat_val = [int(x) for x in line_info[2::2]]
tcp_stat = dict(zip(tcp_stat_key, tcp_stat_val))
break
except Exception :
self.logger().error(traceback.format_exc().replace('\n', '\\n'))
self._tcp_time_await = tcp_stat.get('tw', 0)
return self._tcp_time_await
# get netstat statics
def getTcpNetstat(self):
tcp_net_info = {}
try :
with open('/proc/net/netstat') as file:
for line in file:
if line.startswith('TcpExt:'):
if 'ListenDrops' in line:
tcp_net_key = line.split()[1::]
else:
tcp_net_val = [int(x) for x in line.split()[1::]]
tcp_net_info = dict(zip(tcp_net_key, tcp_net_val))
except Exception :
self.logger().error(traceback.format_exc().replace('\n', '\\n'))
self._tcp_netstat['ListenDrops'] = int(tcp_net_info.get('ListenDrops', 0))
self._tcp_netstat['TCPFastRetrans'] = int(tcp_net_info.get('TCPFastRetrans', 0))
self._tcp_netstat['TCPTimeouts'] = int(tcp_net_info.get('TCPTimeouts', 0))
self._tcp_netstat['ListenOverflows'] = int(tcp_net_info.get('ListenOverflows', 0))
self._tcp_netstat['TCPReqQFullDrop'] = int(tcp_net_info.get('TCPReqQFullDrop', 0))
self._tcp_netstat['PAWSPassive'] = int(tcp_net_info.get('PAWSPassive', 0))
return self._tcp_netstat
# get tcp status
def getTcpStatus(self):
for key in self._tcpStatus:
self._tcpStatusMap[key] = 0
cmd = "netstat -tan |grep ^tcp |awk '{++a[$6]} END{for (i in a) print i, a[i]}'"
out=[]
try:
process = os.popen(cmd)
out = process.read().split('\n')
process.close()
out = out[:-1]#delete the last empty line
for item in out:
item = item.strip().split(' ')
key = item[0]
value = item[1]
if key in self._tcpStatus:
self._tcpStatusMap[key] = int(value)
except Exception :
self.logger().error(traceback.format_exc().replace('\n', '\\n'))
return self._tcpStatusMap
def test_tcp_collect():
collect = TcpCollect()
print collect.get_net_info()
class ProcessCollect(BaseProcess):
def __init__(self):
BaseProcess.__init__(self, constant.PLUGIN_CONFIG_PATH, self.__class__.__name__)
self.status_map = {
psutil.STATUS_IDLE: 0, psutil.STATUS_RUNNING: 1, psutil.STATUS_SLEEPING: 2, psutil.STATUS_DISK_SLEEP: 3,
psutil.STATUS_STOPPED: 4, psutil.STATUS_TRACING_STOP: 5, psutil.STATUS_ZOMBIE: 6, psutil.STATUS_DEAD: 7,
psutil.STATUS_WAKING: 8, psutil.STATUS_LOCKED: 9, psutil.STATUS_WAITING: 10
}
self._max_proc_pid = 0
def get_proc_num(self):
process_num = 0
zombie_num = 0
running_num = 0
command = "ps -eo state"
try:
output = CommUtils.ExecuteTimeoutCommand(command, 3)
lines = output.strip().split('\n')
except Exception as e:
self.logger().error(traceback.format_exc().replace('\n', '\\n'))
return (process_num, zombie_num, running_num)
for line in lines[1:]:
try:
process_num += 1
state = line.strip()
if state == "R":
running_num += 1
if state == "Z":
zombie_num += 1
except Exception as e:
pass
return (process_num, zombie_num, running_num)
def get_max_proc_pid(self):
try:
cmd = "cat /proc/sys/kernel/pid_max"
process = os.popen(cmd)
out = process.readline().strip()
process.close()
self._max_proc_pid = int(out)
except Exception :
self.logger().error(traceback.format_exc().replace('\n', '\\n'))
return self._max_proc_pid
def get_proc_info(self, proc_name):
proc_status = -1
proc_cpu_rate = 0
proc_mem = 0
proc_time = 0
proc_num = 0
for p in psutil.process_iter():
try:
if p.name() == proc_name:
proc_info = p.as_dict(attrs=['status', 'create_time', 'cpu_percent', 'memory_info'])
if ((proc_status == -1) or (proc_info['status'] == psutil.STATUS_RUNNING)):
proc_status = self.status_map[proc_info['status']]
proc_cpu_rate += proc_info['cpu_percent']
proc_mem += proc_info['memory_info'].rss/1024/1024
startTime = int(time.time() - proc_info['create_time'])
if (startTime > proc_time):
proc_time = startTime
except Exception, e:
self.logger().error(traceback.format_exc().replace('\n', '\\n'))
return (proc_status, proc_cpu_rate, proc_mem, proc_time, proc_num)
def get_proc_info_by_pid(self, pid_set):
process_info_dict = {}
for item in pid_set:
try:
proc = psutil.Process(pid = item)
proc_info_list = []
#procss name
proc_info_list.append(proc.name())
#process pid
proc_info_list.append(proc.pid)
#process username
proc_info_list.append(proc.username())
#process cmdline
proc_info_list.append(' '.join(proc.cmdline()))
#process cpu usage
proc_info_list.append(proc.cpu_percent(interval=1))
#process fd number
proc_info_list.append(proc.num_fds())
#process memory
proc_info_list.append(proc.memory_info().rss/1024/1024)
#process status
if proc.status() in self.status_map.keys():
proc_info_list.append(self.status_map[proc.status()])
else:
#current psutil cant get idle status
proc_info_list.append(0)
#process running time
proc_info_list.append(int(int(time.time() - proc.create_time())))
process_info_dict[item] = proc_info_list
except Exception, e:
self.logger().error(traceback.format_exc().replace('\n', '\\n'))
return process_info_dict
def test_process_collect():
collect = ProcessCollect()
print collect.get_proc_num()
print collect.get_proc_info('moagent')
print collect.get_proc_info_by_pid([1])
class NetworkCollect(BaseProcess):
def __init__(self):
BaseProcess.__init__(self, constant.PLUGIN_CONFIG_PATH, self.__class__.__name__)
self._prev_data = {}
def get_network_info(self, interface_name):
(in_traffic, out_traffic, in_packet, out_packet, drop_packet) = (-1, -1, -1, -1, -1)
net_io_infos = psutil.net_io_counters(True)
if net_io_infos.has_key(interface_name):
info = net_io_infos.get(interface_name)
current_time = int(time.time())
if (self._prev_data.has_key(interface_name)
and self._prev_data[interface_name].has_key('timestamp')
and current_time > self._prev_data[interface_name]['timestamp']):
delta_time = current_time - self._prev_data[interface_name]['timestamp']
prev_info = self._prev_data[interface_name]['data']
out_traffic = float(info.bytes_sent - prev_info.bytes_sent)/delta_time*8
in_traffic = float(info.bytes_recv - prev_info.bytes_recv)/delta_time*8
out_packet = float(info.packets_sent - prev_info.packets_sent)/delta_time
in_packet = float(info.packets_recv - prev_info.packets_recv)/delta_time
drop_packet = info.dropin + info.dropout - prev_info.dropin - prev_info.dropout
self._prev_data[interface_name] = { 'data' : info, 'timestamp' : current_time }
return (in_traffic, out_traffic, in_packet, out_packet, drop_packet)
def test_network_collect():
collector = NetworkCollect()
print 'eth0 : ', collector.get_network_info('eth0')
print 'eth1 : ', collector.get_network_info('eth1')
time.sleep(5)
print 'eth0 : ', collector.get_network_info('eth0')
print 'eth1 : ', collector.get_network_info('eth1')
class SysCollect(BaseProcess):
runfile = '.RUNTIME'
MAX_REPORT_UPTIME = float(600)
def __init__(self):
BaseProcess.__init__(self, constant.PLUGIN_CONFIG_PATH, self.__class__.__name__)
self._last_run_time = 0.0
try:
if not os.path.exists(SysCollect.runfile):
self._last_run_time = self._get_run_time()
with open(SysCollect.runfile, 'w') as fd:
fd.write('')
with open(SysCollect.runfile) as fd:
if self._get_run_time() < SysCollect.MAX_REPORT_UPTIME:
self._last_run_time = float(fd.readline())
except Exception :
self.logger().error(traceback.format_exc().replace('\n', '\\n'))
def get_boot_time(self):
self._curr_run_time = self._get_run_time()
if self._last_run_time > self._curr_run_time:
self._last_run_time = self._curr_run_time
reboot = True
else:
reboot = False
self._record_run_time()
return reboot, self._curr_run_time
def _record_run_time(self):
try:
with open(SysCollect.runfile, 'w') as fd:
fd.write(str(self._curr_run_time))
except IOError :
self.logger().error(traceback.format_exc().replace('\n', '\\n'))
def _get_run_time(self):
runtime = -1
try:
with open("/proc/uptime") as fd:
str = fd.read()
runtime = float(str.split()[0])
except Exception :
self.logger().error(traceback.format_exc().replace('\n', '\\n'))
return runtime
def test_sys_collect():
collector = SysCollect()
print collector.get_boot_time()
if __name__ == '__main__':
#print 'cpu:'
test_cpu_collect()
#print 'disk:'
#test_disk_collect()
#print 'network:'
#test_network_collect()
#print 'tcp:'
#test_tcp_collect()
#print 'mem:'
#test_mem_collect()
#print 'sys:'
#test_sys_collect()
#print("process:")
#test_process_collect()