File: //usr/local/qcloud/monitor/barad/plugin/collector/vm/disk.py
import sys
import os,time
import re
sys.path.append(os.getcwd() + '/../../../comm/')
import constant
from plugin_base import VmBaseCollector
from utils.metric_handler import MetricHandler
from cutils import CommUtils
from utils.collect_tool import DiskCollect
class DiskCollector(VmBaseCollector):
def init(self):
self.set_frequency(10)
self.collector = DiskCollect()
self.handler = MetricHandler()
self.handler.namespace = 'qce/cvm'
self.handler.dimensions = ['vm_uuid', 'vmip', 'diskid']
self.disk_ro = self.collector.get_disk_ro_info()
self.is_nvme = self.init_nvme_vm()
self.nvme_address_info={}
self._last_report_event_time = 0
def init_nvme_vm(self):
cmd = 'lspci |grep "Non-Volatile memory controller"'
output = CommUtils.ExecuteTimeoutCommand(cmd, 3)
if len(output) > 0:
return 1
return 0
def get_address_from_dev(self,name):
#we can promise nvme address won't change in moa lifecycle,add or replace nvme device must reboot vm
if self.nvme_address_info.has_key(name):
return self.nvme_address_info[name]
if os.path.exists('/sys/block/%s/device/address' % name.split('/dev/')[1]):
with open('/sys/block/%s/device/address' % name.split('/dev/')[1], 'r') as f:
output = f.read().strip()
else:
cmd = "udevadm info -q all -n %s |head -n 1 |awk -F '/' '{print $5}'" % name
output = CommUtils.ExecuteTimeoutCommand(cmd, 3)[0:12]
self.nvme_address_info[name] = output
return output
def do_collect(self):
vm_uuid = self.get_vm_uuid()
vmip = self.get_vmip()
disk_info = self.collector.get_disk_info()
disk_part_info = self.collector.get_partition_info()
''' In case of disk by umount or detach '''
for key in self.disk_ro.keys():
if key not in disk_part_info.keys():
readonlyType = self.disk_ro[key]
del self.disk_ro[key]
self.collector.record_disk_ro_info(self.disk_ro)
additionalMsg = [
{"key" : "name", "value" :key},
{"key" : "type", "value" :readonlyType}
]
dim = [{"key" : "uuid", "value" :vm_uuid}]
timestamp = int(time.time())
event_metric = {"version":1, "Action": "SendEventAlarm", "caller":"cvm", "callee":"QCMonitor", "productName":"cvm", "timestamp":timestamp, "occurTime":timestamp, "dimensions":dim}
event = {"eventName":"disk_readonly", "status":0, "additionalMsg":additionalMsg}
data = {'sender':'event_sender', 'datas': dict(event_metric, **event)}
self.put_data(data)
for key in disk_part_info.keys():
dimensions = {'vm_uuid': vm_uuid, 'vmip': vmip, 'diskName': key, 'diskId': disk_part_info[key]['diskId'], 'diskType': disk_part_info[key]['diskType']}
if disk_part_info[key]['diskId'] == "":
dimensions["diskId"] = 'disk-' + vm_uuid + '-' + key
dim = [
{"key": "uuid", "value": vm_uuid}
]
timestamp = int(time.time())
event_metric = {"version":1, "Action": "SendEventAlarm", "caller":"cvm", "callee":"QCMonitor", "productName":"cvm", "timestamp":timestamp, "occurTime":timestamp, "dimensions":dim}
disk_total = disk_part_info[key]['diskSize']
disk_usage = disk_part_info[key]['diskUsage']
inode_usage = disk_part_info[key]['inodeUsage']
disk_readonly = 0 if not disk_part_info[key]['diskRO'] else (2 if inode_usage >= 100 or disk_usage >= 100 else 1)
if(disk_readonly != 0):
if not self.disk_ro.has_key(key):
readonlyType = "disk readonly"
if(inode_usage >= 100):
readonlyType += " inode usage full"
if(disk_usage >= 100):
readonlyType += " size usage full"
self.disk_ro[key] = readonlyType
self.collector.record_disk_ro_info(self.disk_ro)
additionalMsg = [
{"key" : "name", "value" :key},
{"key" : "type", "value" :readonlyType}
]
event = {"eventName":"disk_readonly", "status":1, "additionalMsg":additionalMsg}
data = {'sender':'event_sender', 'datas': dict(event_metric, **event)}
self.put_data(data)
elif(self.disk_ro.has_key(key)):
readonlyType = self.disk_ro[key]
del self.disk_ro[key]
self.collector.record_disk_ro_info(self.disk_ro)
additionalMsg = [
{"key" : "name", "value" :key},
{"key" : "type", "value" :readonlyType}
]
event = {"eventName":"disk_readonly", "status":0, "additionalMsg":additionalMsg}
data = {'sender':'event_sender', 'datas': dict(event_metric, **event)}
self.put_data(data)
batch_metric_part = [
{'name':'disk_total', 'value':disk_total, 'unit':'mib'},
{'name':'disk_usage', 'value':disk_usage},
{'name':'disk_readonly', 'value':disk_readonly},
{'name':'inode_usage', 'value': inode_usage},
]
self.handler.add_batch_metric(batch = batch_metric_part, dimensions = dimensions)
if (len( self.handler.get_metrics()) > 0) :
data = {'sender':'nws_sender', 'datas': self.handler.pop_metrics()}
self.put_data(data)
# just for nvme vm
if self.is_nvme == 1:
for key in disk_info.keys():
dev_name = '/dev/%s' % key
address = self.get_address_from_dev(dev_name)
dimensions = {'vm_uuid': vm_uuid, 'vmip': vmip, 'diskname': key, 'address':address}
disk_readtraff = disk_info[key]['diskReadtraff']
disk_writetraff = disk_info[key]['diskWritetraff']
disk_readiops = disk_info[key]['disk_readiops']
disk_writeiops = disk_info[key]['disk_writeiops']
disk_tmpio = disk_info[key]['disk_tmpio']
disk_ioutil = disk_info[key]['disk_ioutil']
disk_await = disk_info[key]['diskAwait']
batch_metric_disk =[
{'name':'vm_disk_read_traffic', 'value':disk_readtraff, 'unit':'KBps'},
{'name':'vm_disk_write_traffic', 'value':disk_writetraff, 'unit':'KBps'},
{'name':'vm_disk_read_iops','value':disk_readiops, 'unit':'countps'},
{'name':'vm_disk_write_iops','value':disk_writeiops,'unit':'countps'},
{'name':'vm_disk_tmpio','value':disk_tmpio},
{'name':'vm_disk_util','value':disk_ioutil},
{'name':'vm_disk_await', 'value':disk_await, 'unit':'ms'}
]
self.handler.add_batch_metric(batch = batch_metric_disk, dimensions = dimensions)
if (len( self.handler.get_metrics()) > 0) :
data = {'sender':'nws_sender', 'datas': self.handler.pop_metrics()}
self.put_data(data)
now = int(time.time())
if (self._last_report_event_time == 0) or (now - self._last_report_event_time >= 60*60):
self.fstab_configation_check()
self.dns_configation_check()
self.vpcgateway_configation_check()
self._last_report_event_time = now
def fstab_configation_check(self):
vm_uuid = self.get_vm_uuid()
vmip = self.get_vmip()
fstab = "/etc/fstab"
fstablist = []
first = ""
second = ""
realpath = ""
fstab_configraion_error_flag = 0
errmsg = ""
try :
if os.path.exists(fstab):
for line in open(fstab,"r"):
if len(line.split()) < 4:
continue
first = line.split()[0].split("=")[-1]
second = line.split()[1]
filesystem_type = line.split()[2]
if line.startswith("/dev"):
# ubuntu contain config:/dev/disk/by-uuid/uuid1 / ext4 defaults 0 1,so can't use full path when search device path,only use device uuid
if "/by-uuid/" in first:
index = first.find("/by-uuid/")
first = first[index + 9 : ]
elif "/by-label/" in first:
index = first.find("/by-label/")
first = first[index + 10 : ]
elif "/by-path/" in first:
index = first.find("/by-path/")
first = first[index + 9 : ]
elif "/by-id/" in first:
index = first.find("/by-id/")
first = first[index + 7 : ]
fstablist.append((first, second, filesystem_type))
# debian contain config : UUID=uuid1 / ext4 errors=remount-ro 0
if line.startswith("UUID"):
fstablist.append((first, second, filesystem_type))
if line.startswith("LABEL"):
fstablist.append((first,second, filesystem_type))
for tup in fstablist :
first = tup [0]
second = tup [1]
filesystem_type = tup [2]
cmd = "lsblk -p -f | grep " + first + " | awk '{print $2}'"
out = CommUtils.ExecuteTimeoutCommand(cmd, 3).split("\n")[0]
# if device isn't exist, don't check mount point and filesystem type;
# if mount point isn't exist, don't check filesystem type;
if out == "":
fstab_configraion_error_flag = 1
errmsg += "device " + first + " isn't exist;"
elif os.path.islink(second):
realpath = os.readlink(second)
if not os.path.exists(realpath):
fstab_configraion_error_flag = 1
errmsg += "mount point " + realpath + " isn't exist;"
else:
if out != filesystem_type:
fstab_configraion_error_flag = 1
errmsg += "dev " + first + " filesystem type isn't " + filesystem_type +";"
else:
if not os.path.exists(second):
fstab_configraion_error_flag = 1
errmsg += "mount point " + second + " isn't exist;"
else:
if out != filesystem_type:
fstab_configraion_error_flag = 1
errmsg += "dev " + first + " filesystem type isn't " + filesystem_type +";"
dimensions = {'vm_uuid': vm_uuid, 'vmip': vmip}
batch_metric_disk =[
{'name':'e_cfg_fstab', 'value':fstab_configraion_error_flag},
]
self.handler.add_batch_metric(batch = batch_metric_disk, dimensions = dimensions)
data = {'sender':'nws_sender', 'datas': self.handler.pop_metrics()}
self.put_data(data)
except Exception as e:
pass
def dns_configation_check(self):
vm_uuid = self.get_vm_uuid()
vmip = self.get_vmip()
nameserver_list = [['183.60.83.19', '183.60.82.98'], ['127.0.0.53'], ['10.112.65.31', '10.112.65.32'], ['10.59.218.193', '10.59.218.194'],
['100.121.190.140', '100.121.190.141'], ['10.236.158.114', '10.236.158.106'], ['10.53.216.182', '10.53.216.198'], ['10.48.46.77', '10.48.46.27'],
['100.83.224.91', '100.83.224.88'], ['10.116.19.188', '10.116.19.185'], ['10.243.28.52', '10.164.55.3'], ['100.78.90.19', '100.78.90.8'],
['10.59.218.18', '10.112.65.51'], ['100.88.222.14', '100.88.222.16'], ['100.102.22.21', '100.102.22.30'], ['100.120.52.60', '100.120.52.61'],
['10.165.180.53', '10.165.180.62']]
dns_config_path = "/etc/resolv.conf"
dhcp_config_path = ""
dns_configraion_error_flag = 0
dns_configraion_check_flag = 0
errmsg = ""
try :
cmd = "ip route "
cmd_out = CommUtils.ExecuteTimeoutCommand(cmd ,3).split("\n")
match = re.search(r"dev (\w+)", cmd_out[0])
nic_name = ""
if(match):
nic_name = match.group(1)
else:
with open("/proc/net/route", "r") as file:
lines = file.readlines()
if len(lines) > 1:
nic_name=lines[1].split()[0]
eth_config_list = ["/etc/sysconfig/network-scripts/ifcfg-" + nic_name, "/etc/sysconfig/network/ifcfg-" + nic_name]
for eth_config in eth_config_list:
key_word = "PEERDNS=no"
if os.path.exists(eth_config):
with open(eth_config,'r') as fd:
for line in fd:
if key_word in line:
dns_configraion_check_flag = 1
eth_config = "/etc/NetworkManager/conf.d/90-dns-none.conf"
key_word = "dns=none"
if os.path.exists(eth_config):
with open(eth_config,'r') as fd:
for line in fd:
if key_word in line:
dns_configraion_check_flag = 1
eth_config = "/etc/network/interfaces"
key_word = "dns-nameservers peerdns"
if os.path.exists(eth_config):
with open(eth_config,'r') as fd:
for line in fd:
if key_word in line:
dns_configraion_check_flag = 1
yaml_files_path = "/etc/netplan"
if os.path.exists(yaml_files_path):
yaml_files = [f for f in os.listdir(yaml_files_path) if f.endswith('.yaml')]
for eth_config in yaml_files:
key_word = "use-dns: false"
eth_config = yaml_files_path + '/' + eth_config
if os.path.exists(eth_config):
with open(eth_config,'r') as fd:
for line in fd:
if key_word in line:
dns_configraion_check_flag = 1
except Exception as e:
self.logger().error("dns_configation_check error: %s" + str(e))
pass
try :
if dns_configraion_check_flag != 1:
if not os.path.exists(dns_config_path):
print("dns sys config path:" + dns_config_path + " isn't exist!")
return
cmd = "awk '/^nameserver/{print $2}' " + dns_config_path
cmd_out = CommUtils.ExecuteTimeoutCommand(cmd ,3).split("\n")
for_count = 0
for item in nameserver_list:
if (set(item) <= set(cmd_out)):
for_count += 1
if for_count == 0:
dns_configraion_error_flag = 1
errmsg += "target dns server " + str(nameserver_list) + " isn't in dns server config " + str(cmd_out) + ";"
dimensions = {'vm_uuid': vm_uuid, 'vmip': vmip}
batch_metric_disk =[
{'name':'e_cfg_dns_overwrite', 'value':dns_configraion_error_flag},
]
self.handler.add_batch_metric(batch = batch_metric_disk, dimensions = dimensions)
data = {'sender':'nws_sender', 'datas': self.handler.pop_metrics()}
self.put_data(data)
except Exception as e:
pass
def vpcgateway_configation_check(self):
vm_uuid = self.get_vm_uuid()
vmip = self.get_vmip()
vpcgateway_config_path = "/usr/local/sbin/vpcGateway.sh"
sys_rc_local_path = "/etc/rc.d/rc.local"
vpcgateway_configraion_error_flag = 0
errmsg = ""
try :
if not os.path.exists(vpcgateway_config_path):
errmsg = ("vpcgateway config path:" + vpcgateway_config_path + " isn't exist!")
else:
if os.path.exists(sys_rc_local_path):
with open(sys_rc_local_path) as fd:
rc_local_file = fd.read()
if(rc_local_file.find(vpcgateway_config_path) != -1):
vpcgateway_configraion_error_flag = 1
else :
errmsg = ("rc.local don't contain vpcgateway config file:" + vpcgateway_config_path + " !")
else:
errmsg = ("rc.local path:" + sys_rc_local_path + " isn't exist!")
dimensions = {'vm_uuid': vm_uuid, 'vmip': vmip}
batch_metric_disk =[
{'name':'e_cfg_vpcgateway_sh', 'value':vpcgateway_configraion_error_flag},
]
self.handler.add_batch_metric(batch = batch_metric_disk, dimensions = dimensions)
data = {'sender':'nws_sender', 'datas': self.handler.pop_metrics()}
self.put_data(data)
except Exception as e:
pass
def main():
collector = DiskCollector()
collector.init()
while 1:
collector.collect()
collector.dump_data()
time.sleep(10)
if __name__ == '__main__':
main()