I've created a script to access the controller's probe_config_get to create 3 CSV files of the CDM probe (CPU, DISK, Memory) for an administrative overview of what our thresholds are on each host in Infrastructure Manager.
The script works to a certain extent...
It starts writing the CSV files (accurately) but after a few minutes I get an error message pop up:
"The execute request failed: communication error"
Which is puzzling because the script keeps running and writing to the file but although it DOES keep writing, eventually it will stop and never make it through the entire hub list.
I have investigated the hosts where it seems to stop on but I cannot determine what the problem is as they are live and accessible through the controller and I am able to login to the hosts etc
Below is the code. Any help is greatly appreciated.
-- -- Extract the CDM configuration file from the controller as a table and -- access parts of the configuration, and print the contents sorted by section. -- fname1 = "hublist/CPU.csv" fname2 = "hublist/Memory.csv" fname3 = "hublist/Disk.csv" --create file and create headers --CPU = fname1 file.create (fname1) file.write(fname1, "Facility,Hostname,Interval,Samples,QoS,AlarmActive,ErrorActive,ErrorThreshold,WarningActive,Warning(Theshold)\n") --Memory = fname2 file.create (fname2) file.write(fname2,"Facility,Hostname,Interval,Samples,QoS,AlarmActive,(PF)ErrorActive,(PF)ErrorThreshold,(PF)WarningActive,(PF)WarningThreshold,(PH)ErrorActive,(PH)ErrorThreshold,(PH)WarningActive,(PH)WarningThreshold,(SW)ErrorActive,(SW)ErrorThreshold,(SW)WarningActive,(SW)WarningThreshold\n") --Disk = fname3 file.create (fname3) file.write(fname3,"Facility,Hostname,Drive,Active,Percent,QoSDiskUsage,QoSDiskUsagePerc,InodePerc,QoSInodeUsage,QoSInodeUsagePerc,DeltaPerc,DeltaCalcAll,DeltaType,QoSDiskDelta,CriticalActive,CriticalThreshold,MajorActive,MajorThreshold,InodeCriticalActive,InodeCriticalThreshold,InodeMajorActive,InodeMajorThreshold,DeltaCriticalActive,DeltaCriticalThreshold,DeltaMajorActive,DeltaMajorThreshold\n") args = pds.create() pds.putString(args,"name","cdm") domain = "CHS" hl = nimbus.request ("hub","gethubs",args) --Iterate through each Hub for h_row,h_entry in pairs(hl.hublist) do if h_entry.domain == domain then local mypds = pds.create() local r_resp,rc = nimbus.request(h_entry.addr, "getrobots") --There's an else at the bottom if the RC is not 0 but I never get the error printout if rc == 0 then --Iterate through each Host for r_row,r_entry in pairs(r_resp.robotlist) do local mypds2 = pds.create() cfg,rc = nimbus.request ("/CHS/"..h_entry.name.."/"..r_entry.name.."/controller", "probe_config_get",args) -- Extract the named section '/cpu' --There's an else at the bottom if the RC is not 0 but I never get the error printout if rc == 0 then --CPU Leafs cpu = cfg["/cpu"] cpu_alarm = cfg["/cpu/alarm"] cpu_alarm_error = cfg["/cpu/alarm/error"] cpu_alarm_warning = cfg["/cpu/alarm/warning"] --CPU --Headers: Facility, Hostname, Interval, Samples, QoS, AlarmActive, ErrorActive, ErrorThreshold, WarningActive, Warning(Theshold) --------------------------------------------------------- if cpu ~= nil then file.write(fname1,h_entry.name..",") file.write(fname1,r_entry.name..",") file.write(fname1,cpu.interval..",") file.write(fname1,cpu.samples..",") file.write(fname1,cpu.qos_cpu_usage..",") end if cpu_alarm ~= nil then file.write(fname1,cpu_alarm.active..",") end if cpu_alarm_error ~= nil then file.write(fname1,cpu_alarm_error.active..",") file.write(fname1,cpu_alarm_error.threshold..",") end if cpu_alarm_warning ~= nil then file.write(fname1,cpu_alarm_warning.active..",") file.write(fname1,cpu_alarm_warning.threshold.."\n") end --Memory --Headers: Hub,Host,Interval, Samples, QoS, AlarmActive, (PF)ErrorActive, (PF)ErrorThreshold, (PF)WarningActive, (PF)WarningThreshold, (PH)ErrorActive, --(PH)ErrorThreshold, (PH)WarningActive, (PH)WarningThreshold, (SW)ErrorActive, (SW)ErrorThreshold, (SW)WarningActive, (SW)WarningThreshold --------------------------------------------------------- --Memory Leafs memory = cfg["/memory"] memory_alarm = cfg["/memory/alarm"] memory_alarm_pagefile_error = cfg["/memory/alarm/pagefile error"] memory_alarm_pagefile_warning = cfg["/memory/alarm/pagefile warning"] memory_alarm_physical_error = cfg["/memory/alarm/physical error"] memory_alarm_physical_warning = cfg["/memory/alarm/physical warning"] memory_alarm_swap_error = cfg["/memory/alarm/swap error"] memory_alarm_swap_warning = cfg["/memory/alarm/swap warning"] --Interval if memory ~= nil then file.write(fname2,h_entry.name..",") file.write(fname2,r_entry.name..",") file.write(fname2,memory.interval..",") file.write(fname2,memory.samples..",") file.write(fname2,memory.qos_memory_usage..",") else file.write(fname2,",") end --Overall Alarm Status if memory_alarm ~= nil then file.write(fname2,memory_alarm.active..",") else file.write(fname2,",") end --Pagefile Error if memory_alarm_pagefile_error ~= nil then file.write(fname2,memory_alarm_pagefile_error.active..",") file.write(fname2,memory_alarm_pagefile_error.threshold..",") else file.write(fname2,",") end --Pagefile Warning if memory_alarm_pagefile_error ~= nil then file.write(fname2,memory_alarm_pagefile_warning.active..",") file.write(fname2,memory_alarm_pagefile_warning.threshold..",") else file.write(fname2,",") end --Physical Error if memory_alarm_physical_error ~= nil then file.write(fname2,memory_alarm_physical_error.active..",") file.write(fname2,memory_alarm_physical_error.threshold..",") else file.write(fname2,",") end --Physical Warning if memory_alarm_physical_warning ~= nil then file.write(fname2,memory_alarm_physical_warning.active..",") file.write(fname2,memory_alarm_physical_warning.threshold..",") else file.write(fname2,",") end --Swap Error if memory_alarm_swap_error ~= nil then file.write(fname2,memory_alarm_swap_error.active..",") file.write(fname2,memory_alarm_swap_error.threshold..",") else file.write(fname2,",") end --Swap Warning if memory_alarm_swap_warning ~= nil then file.write(fname2,memory_alarm_swap_warning.active..",") file.write(fname2,memory_alarm_swap_warning.threshold.."\n") else file.write(fname2,",\n") end --DISK --Headers: --Drive --Active --Percent --QoSDiskUsage --QoSDiskUsagePerc --InodePerc --QoSInodeUsage --QoSInodeUsagePerc --DeltaPerc --DeltaCalcAll --DeltaType --QoSDiskDelta --CriticalActive --CriticalThreshold --MajorActive --MajorThreshold --InodeCriticalActive --InodeCriticalThreshold --InodeMajorActive --InodeMajorThreshold --DeltaCriticalActive --DeltaCriticalThreshold --DeltaMajorActive --DeltaMajorThreshold --------------------------------------------------------------------------------- --Disk Leafs disk = cfg["/disk"] disk_alarm = cfg["/disk/alarm"] disk_alarm_fixed = cfg["^/disk/alarm/fixed/([^/]+)$"] local filesystems = {} for section,conf in pairs(cfg) do fs = string.match(section, "^/disk/alarm/fixed/([^/]+)$") if fs ~= nil then table.insert(filesystems, fs) end end table.sort(filesystems) for i,fs in pairs(filesystems) do local fs_name = string.gsub(fs, "#", "/") local disk = cfg["/disk/alarm/fixed/"..fs..""] local disk_error = cfg["/disk/alarm/fixed/"..fs.."/error"] local disk_warning = cfg["/disk/alarm/fixed/"..fs.."/warning"] local disk_inode_error = cfg["/disk/alarm/fixed/"..fs.."/inode_error"] local disk_inode_warning = cfg["/disk/alarm/fixed/"..fs.."/inode_warning"] local disk_delta_error = cfg["/disk/alarm/fixed/"..fs.."/delta_error"] local disk_delta_warning = cfg["/disk/alarm/fixed/"..fs.."/delta_warning"] --Disk/Alarm/Fixed/DiskLetter --I noticed that not every config file has each one of these fields so I check first to see if it exists --I'm sure there's a better way to do this but I'm new to LUA file.write(fname3,h_entry.name..",") file.write(fname3,r_entry.name..",") if disk.description ~= nil then file.write(fname3,disk.description..",") else file.write(fname3,",") end if disk.active ~= nil then file.write(fname3,disk.active..",") else file.write(fname3,",") end if disk.percent ~= nil then file.write(fname3,disk.percent..",") else file.write(fname3,",") end if disk.qos_disk_usage ~= nil then file.write(fname3,disk.qos_disk_usage..",") else file.write(fname3,",") end if disk.qos_disk_usage_perc ~= nil then file.write(fname3,disk.qos_disk_usage_perc..",") else file.write(fname3,",") end if disk.inode_percent ~= nil then file.write(fname3,disk.inode_percent..",") else file.write(fname3,",") end if disk.qos_inode_usage ~= nil then file.write(fname3,disk.qos_inode_usage..",") else file.write(fname3,",") end if disk.qos_inode_usage_perc ~= nil then file.write(fname3,disk.qos_inode_usage_perc..",") else file.write(fname3,",") end if disk.delta_percent ~= nil then file.write(fname3,disk.delta_percent..",") else file.write(fname3,",") end if disk.delta_calculate_all ~= nil then file.write(fname3,disk.delta_calculate_all..",") else file.write(fname3,",") end if disk.delta_type ~= nil then file.write(fname3,disk.delta_type..",") else file.write(fname3,",") end if disk.qos_disk_delta ~= nil then file.write(fname3,disk.qos_disk_delta..",") else file.write(fname3,",") end ----Disk/Alarm/Fixed/DiskLetter/Error if disk_error.active ~= nil then file.write(fname3,disk_error.active..",") else file.write(fname3,",") end if disk_error.threshold ~= nil then file.write(fname3,disk_error.threshold..",") else file.write(fname3,",") end ----Disk/Alarm/Fixed/DiskLetter/Warning if disk_warning.active ~= nil then file.write(fname3,disk_warning.active..",") else file.write(fname3,",") end if disk_warning.threshold ~= nil then file.write(fname3,disk_warning.threshold..",") else file.write(fname3,",") end ----Disk/Alarm/Fixed/DiskLetter/InodeError if disk_inode_error.active ~= nil then file.write(fname3,disk_inode_error.active..",") else file.write(fname3,",") end if disk_inode_error.threshold ~= nil then file.write(fname3,disk_inode_error.threshold..",") else file.write(fname3,",") end ----Disk/Alarm/Fixed/DiskLetter/InodeWarning if disk_inode_warning.active ~= nil then file.write(fname3,disk_inode_warning.active..",") else file.write(fname3,",") end if disk_inode_warning.threshold ~= nil then file.write(fname3,disk_inode_warning.threshold..",") else file.write(fname3,",") end ----Disk/Alarm/Fixed/DiskLetter/DeltaError if disk_delta_error.active ~= nil then file.write(fname3,disk_delta_error.active..",") else file.write(fname3,",") end if disk_delta_error.threshold ~= nil then file.write(fname3,disk_delta_error.threshold..",") else file.write(fname3,",") end ----Disk/Alarm/Fixed/DiskLetter/DeltaWarning if disk_delta_warning.active ~= nil then file.write(fname3,disk_delta_warning.active..",") else file.write(fname3,",") end if disk_delta_warning.threshold ~= nil then file.write(fname3,disk_delta_warning.threshold.."\n") else file.write(fname3,",") end end else print("Received error", rc, " on request to ", r_entry.name) end end print("Received error", rc, " on request to ", h_entry.name) end end end