chris.naude

LUA - Auto Ping All Robots

Discussion created by chris.naude on Apr 7, 2011

This is a script I put together to ping all robots in all hubs. Adding hosts to net_connect seems to be too much of a manual process. I prefer if hubs just automatically ping every robot. I used the rawalarm code from here. I have this scheduled to run from the nas every minutes. It's able to ping 400+ robots within the 5 minute window. I'm sure there is a better way to do this but it works for us.

 

 

debug=0  hub="PRIMARY HUB" my_domain="DOMAIN"  -- Generates a nimid function unique_id()    key = ""    base="ABCDEFGHIJKLMNOPQRSTUVWXYZ"    math.random();math.random()    key = mid(base,math.random(1,26),1) .. mid(base,math.random(1,26),1) ..    sprintf("%08d", math.random(1,100000000)) .. "-" ..    sprintf("%05d",math.random(1,100000))    return key end  ----------------------------------------------------------------------------------------- -- rawalarm (source,severity,subsystem,message ]]]]) -- returns 0 - ok, rest error --- function rawalarm (source,severity,subsystem,message,suppkey,domain,origin,robot,probe,tag1,tag2)    -- expect following parameters    if type(severity) ~= "number" then return 1 end    if type(message) ~= "string" then return 1 end    if type(subsystem) ~= "string" then return 1 end    if type(source) ~= "string" then return 1 end     -- set default values for optional parameters.    if type(domain) ~= "string" then domain="lua-domain" end    if type(origin) ~= "string" then origin="lua-origin" end    if type(robot) ~= "string" then robot="lua-robot" end    if type(probe) ~= "string" then probe="lua-probe" end    if type(suppkey) ~= "string" then suppkey="lua-suppkey" end      msg = pds.create ()    nimid = unique_id()       -- Create message header    pds.putString (msg,"nimid",nimid)    pds.putInt    (msg,"nimts",timestamp.now() )    pds.putString (msg,"subject","alarm")    pds.putInt    (msg,"pri",1)    pds.putString (msg,"source",source)    pds.putString (msg,"origin",origin)    pds.putString (msg,"domain",domain)    pds.putString (msg,"robot",robot)    pds.putString (msg,"prid",probe)    pds.putString (msg,"suppression","y+000000000#" .. suppkey)    pds.putString (msg,"supp_key",suppkey)    if type(tag1) == "string" then       pds.putString (msg,"user_tag_1",tag1)    end    if type(tag2) == "string" then       pds.putString (msg,"user_tag_2",tag2)    end       -- Add raw alarm data    udata = pds.create()    pds.putInt    (udata,"level",severity)    pds.putString (udata,"subsys",subsystem)    pds.putString (udata,"message",message)       pds.putPDS (msg,"udata",udata)       -- Post the message to the hub-spooler    t,rc = nimbus.request ("spooler","hubpost",msg)     pds.delete (udata)    pds.delete (msg)    return rc,nimid end  -- initialize the random generator math.randomseed(os.time())  reply1,rc = nimbus.request("/"..my_domain.."/"..hub.."/"..hub.."/hub", "gethubs") if rc == 0 then    for idx,hub in pairs(reply1["hublist"]) do       if debug >= 1 then          printf("[%s] => %s",hub.domain,hub.name)       end       if hub.name then          reply2,rc = nimbus.request("/"..hub.domain.."/"..hub.name.."/"..hub.name.."/hub", "getrobots")          if rc == 0 and reply2.robotlist then             for k,v in pairs(reply2["robotlist"]) do                if debug >= 2 then                   for x,y in pairs(v) do                      printf("[%s] => %s", x,y)                   end                end                if v.name then                   ping_reply = action.ping(v.name)                   if ping_reply == true then                                            rc,id = rawalarm(v.name,NIML_CLEAR,"1.1.1","["..v.name.."] Node up! Ping response good.",SCRIPT_NAME.."/"..v.name,hub.domain,v.name,v.name,"nas",v.os_user1,v.os_user2)                      if debug >= 1 then                         printf("%s: OKAY",v.name)                         printf ("rawalarm: %s, %d",id,rc)                      end                   else                                           rc,id = rawalarm(v.name,NIML_CRITICAL,"1.1.1","["..v.name.."] Node down! Ping response failed.",SCRIPT_NAME.."/"..v.name,hub.domain,v.name,v.name,"nas",v.os_user1,v.os_user2)                                           if debug >= 1 then                            printf("%s: NOT OKAY",v.name)                         printf ("rawalarm: %s, %d",id,rc)                      end                   end                end             end          end       end    end end  

Outcomes