[Pdns-users] CPU consumption of pdns_recursor

Nejedlo, Mark Mark.Nejedlo at tdstelecom.com
Mon Apr 5 17:30:11 UTC 2021


On Monday, April 5, 2021 10:05 AM, Otto Moerbeek Wrote:
> On Mon, Apr 05, 2021 at 02:40:17PM +0000, Nejedlo, Mark via Pdns-users
> wrote:
> 
> > We recently replaced some Bind servers with PowerDNS recursor, and
> > were rather surprised to see CPU usage essentially double for the same
> > workload.  My expectation was that the load would be more or less
> > equivalent between the two resolvers.  It looks like the load is
> > centered in the actual pdns_recursor worker threads, not the distributor
> > threads or dnsdist.  Is it expected that CPU usage would be so much
> > higher under PowerDNS?  Is there a debug parameter that can be set to
> > see where PowerDNS is spending its time?
> >
> > I've already gone through the performance guide and don't see any
> > additional tuning parameters that might help.  We're using LUA, but 1)
> > it's pretty lightweight (a couple hash lookups and modify dq object),
> > and 2) when I disabled the LUA scripting for testing, it didn't make a
> > noticeable difference in the load.
> 
> Impossible to say anything without any info about hardware, os, config
> and metrics.

Hardware is a VMware guest on ESXi, 12 cores, 4GB RAM (no apparent RAM pressure).  OS is Red Hat Enterprise 7.  Running recursor 4.3.5, via the official PowerDNS centos 7 RPM.

Config Files
------------

/etc/pdns-recursor/recursor.conf:
--------------------
setgid=pdns-recursor
setuid=pdns-recursor
version-string=anonymous
threads=8
pdns-distributes-queries=yes
distributor-threads=2
distribution-load-factor=1.25
query-local-address=64.50.242.202
query-local-address6=2600:3404:200:1:250:56ff:feae:df16

use-incoming-edns-subnet=true
xpf-rr-code=65280
xpf-allow-from=127.0.0.1, ::1
edns-subnet-whitelist=tds.net,tdstelecom.com

local-port=5353
local-address=127.0.0.1,::1
forward-zones-file=/etc/pdns-recursor/forward.zones
lua-dns-script=/etc/pdns-recursor/recursor-script.lua
--------------------


/etc/pdns-recursor/recursor-script.lua
--------------------
-- load business opt out list once on lua startup/reload
nxd_business_opt_out = newNMG()
nxd_boo_f = assert(io.open('/etc/pdns-recursor/NXD_BusinessOptOut.txt', 'r'))
for line in nxd_boo_f:lines() do
  if not string.match(line, '[G-Zg-z<>#]') then -- throw away non-data lines associated with answerx formatting
    nxd_business_opt_out:addMask(line)
  end
end

nxdomainhostskip = {'wpad', 'mail', 'ntp', 'voip', 'ftp', 'irc', 'finger'}
nxdomainhostmatch = {'web', 'my', 'ww', 'home', 'http'}

nxd_data = {}

-- counters for redirects and opt-out hits
nxd_optoutcount = getMetric("nxdomain-opt-out")
nxd_redirectcount = getMetric("nxdomain-redirect")

-- maintenance processes, including:
-- - refreshing the NXDomain hijacking record
function maintenance()
  local socket = require("socket")
  local udp = assert(socket.udp6())
  local resp
  local nxd_tmp = {}
  
  udp:settimeout(1)
  assert(udp:setsockname("*",0))
  assert(udp:setpeername("::1",5354))
  
  for i = 0, 2, 1 do
    assert(udp:send("NXD ANY"))
    resp = udp:receive()
    if resp then
      break
    end
  end
  
  if resp == nil then
    pdnslog("NXdomain refresh timeout")
  else
    local splitdata = {}
    for splits in string.gmatch(resp, "%S+") do
      table.insert(splitdata, splits)
    end
    if splitdata[1] == 'NXD' then
      local rrtype = ''
      local ttl = ''
      local ip = ''
      for i = 2, #splitdata do
        if rrtype == '' then
          if splitdata[i] == 'A' or splitdata[i] == 'AAAA' then
            rrtype = splitdata[i]
          elseif splitdata[i] == 'NODATA' then
            -- pdnslog('No data available')
            break
          else
            pdnslog('Invalid rrtype provided')
            break
          end
        elseif ttl == '' then
          ttl = tonumber(splitdata[i])
          if ttl == nil then
            pdnslog('Invalid ttl provided')
            break
          end
        elseif ip == '' then
          ip = splitdata[i]
          local tmpdr = newDR(newDN('redirect'), rrtype, ttl, ip, pdns.place.ANSWER)
          if tmpdr == nil then
            pdnslog("Unable to create DR for "..rrtype..' '..ttl..' '..ip)
            break
          else
            table.insert(nxd_tmp, tmpdr)
          end
          rrtype = ''
          ttl = ''
          ip = ''
        else
          pdnslog('Should not be reachable')
        end
      end -- for i = 2, #splitdata
    else
      pdnslog("invalid response "..resp)
    end -- if splitdata['1'] == 'NXD'
  end -- if/else resp == nil
  nxd_data = nxd_tmp
end

-- make redirect processing its own function so we can call it from multiple places
function nxd_handler(dq)
  for i = 1, #nxd_data do
    if dq.qtype == nxd_data[i].type or dq.qtype == pdns.ANY then
      dq.rcode = pdns.NOERROR -- make it a normal answer
      dq:addAnswer(nxd_data[i].type, nxd_data[i]:getContent(), nxd_data[i].ttl)
    end
  end
  if dq.rcode == pdns.NOERROR then
    nxd_redirectcount:inc()
  end
  return dq
end

-- If resolver returns nxdomain, redirect to searchguide service
function nxdomain(dq)

  -- Only redirect for A/AAAA/ANY queries
  if dq.qtype == pdns.A or dq.qtype == pdns.AAAA or dq.qtype == pdns.ANY then

    -- skip redirect if host part matches the patterns in nxdomainhostskip
    for i, hpart in ipairs(nxdomainhostskip) do
      if string.match(dq.qname:toString(), "^"..hpart) then
        return false
      end
    end

    -- business customer opt-out
    if nxd_business_opt_out:match(dq.remoteaddr) then
      nxd_optoutcount:inc()
      return false
    end

    -- check if host part matches patterns.  if so, redirect to search service
    for i, hpart in ipairs(nxdomainhostmatch) do
      if string.match(dq.qname:toString(), "^"..hpart) then
        dq = nxd_handler(dq)
        return true
      end
    end

    -- only redirect queries two or fewer levels deep
    if dq.qname:countLabels() < 3 then
      dq = nxd_handler(dq)
      return true
    end

    -- too long and didn't match host parts, so skip redirect
    return false
  end
  return false
end
--------------------


Half-hour stats
--------------------
Apr 05 16:35:34 ny00vmp-rdns0.svc.tds.net pdns_recursor[1545]: stats: 8559042686 questions, 824993 cache entries, 102305 negative entries, 81% cache hits
Apr 05 16:35:34 ny00vmp-rdns0.svc.tds.net pdns_recursor[1545]: stats: throttle map: 1480, ns speeds: 74541, failed ns: 3038, ednsmap: 198701
Apr 05 16:35:34 ny00vmp-rdns0.svc.tds.net pdns_recursor[1545]: stats: outpacket/query ratio 28%, 11% throttled, 0 no-delegation drops
Apr 05 16:35:34 ny00vmp-rdns0.svc.tds.net pdns_recursor[1545]: stats: 11031110 outgoing tcp connections, 42 queries running, 67363308 outgoing timeouts
Apr 05 16:35:34 ny00vmp-rdns0.svc.tds.net pdns_recursor[1545]: stats: 508363 packet cache entries, 2% packet cache hits
Apr 05 16:35:34 ny00vmp-rdns0.svc.tds.net pdns_recursor[1545]: stats: thread 0 has been distributed 1127234640 queries
Apr 05 16:35:34 ny00vmp-rdns0.svc.tds.net pdns_recursor[1545]: stats: thread 1 has been distributed 1025150455 queries
Apr 05 16:35:34 ny00vmp-rdns0.svc.tds.net pdns_recursor[1545]: stats: thread 2 has been distributed 1086409376 queries
Apr 05 16:35:34 ny00vmp-rdns0.svc.tds.net pdns_recursor[1545]: stats: thread 3 has been distributed 956026740 queries
Apr 05 16:35:34 ny00vmp-rdns0.svc.tds.net pdns_recursor[1545]: stats: thread 4 has been distributed 1117212754 queries
Apr 05 16:35:34 ny00vmp-rdns0.svc.tds.net pdns_recursor[1545]: stats: thread 5 has been distributed 981577497 queries
Apr 05 16:35:34 ny00vmp-rdns0.svc.tds.net pdns_recursor[1545]: stats: thread 6 has been distributed 878916852 queries
Apr 05 16:35:34 ny00vmp-rdns0.svc.tds.net pdns_recursor[1545]: stats: thread 7 has been distributed 1378105489 queries
Apr 05 16:35:34 ny00vmp-rdns0.svc.tds.net pdns_recursor[1545]: stats: 4290 qps (average over 1800 seconds)
--------------------


rec_control get-all
--------------------
all-outqueries	2426135931
answers-slow	32622243
answers0-1	6963714100
answers1-10	616485007
answers10-100	589467848
answers100-1000	111976956
auth-zone-queries	3913588
auth4-answers-slow	54645139
auth4-answers0-1	502461956
auth4-answers1-10	778877222
auth4-answers10-100	615451605
auth4-answers100-1000	109890924
auth6-answers-slow	9558349
auth6-answers0-1	42211276
auth6-answers1-10	195413588
auth6-answers10-100	100814046
auth6-answers100-1000	13552926
cache-entries	825029
cache-hits	6781159775
cache-misses	1533106380
case-mismatches	0
chain-resends	174881587
client-parse-errors	0
concurrent-queries	75
cpu-msec-thread-0	91219643
cpu-msec-thread-1	142746545
cpu-msec-thread-2	278497218
cpu-msec-thread-3	250309239
cpu-msec-thread-4	250701322
cpu-msec-thread-5	319416792
cpu-msec-thread-6	328608768
cpu-msec-thread-7	264910279
cpu-msec-thread-8	322031125
cpu-msec-thread-9	335541569
dlg-only-drops	0
dnssec-authentic-data-queries	18646074
dnssec-check-disabled-queries	1113241
dnssec-queries	53214851
dnssec-result-bogus	0
dnssec-result-indeterminate	0
dnssec-result-insecure	0
dnssec-result-nta	0
dnssec-result-secure	10986
dnssec-validations	10986
dont-outqueries	45917207
ecs-queries	1451717
ecs-responses	32451
edns-ping-matches	0
edns-ping-mismatches	0
empty-queries	0
failed-host-entries	1356
fd-usage	269
ignored-packets	1478626
ipv6-outqueries	359162038
ipv6-questions	8551805269
malloc-bytes	0
max-cache-entries	1000000
max-mthread-stack	87984
max-packetcache-entries	500000
negcache-entries	102533
no-packet-error	1601797538
noedns-outqueries	3772686
noerror-answers	7669982655
noping-outqueries	0
nsset-invalidations	2704139
nsspeeds-entries	70484
nxdomain-answers	744158682
nxdomain-opt-out	2407297
nxdomain-redirect	93573041
outgoing-timeouts	67362296
outgoing4-timeouts	56885531
outgoing6-timeouts	10476765
over-capacity-drops	13007
packetcache-entries	510580
packetcache-hits	244194152
packetcache-misses	8307487740
policy-drops	0
policy-result-custom	0
policy-result-drop	0
policy-result-noaction	8314084804
policy-result-nodata	0
policy-result-nxdomain	0
policy-result-truncate	0
qa-latency	9496
qname-min-fallback-success	406051209
query-pipe-full-drops	0
questions	8558845589
real-memory-usage	4266360832
rebalanced-queries	1692943695
resource-limits	0
security-status	1
server-parse-errors	91767
servfail-answers	144318969
spoof-prevents	1
sys-msec	539748918
tcp-client-overflow	0
tcp-clients	4
tcp-outqueries	11030988
tcp-questions	7040320
throttle-entries	1356
throttled-out	306121689
throttled-outqueries	306121689
too-old-drops	107804
truncated-drops	0
udp-in-errors	80342
udp-noport-errors	711240
udp-recvbuf-errors	0
udp-sndbuf-errors	0
unauthorized-tcp	0
unauthorized-udp	0
unexpected-packets	46639
unreachables	206095
uptime	6103754
user-msec	2044729297
variable-responses	18555
x-our-latency	59
x-ourtime-slow	2003931
x-ourtime0-1	8265444257
x-ourtime1-2	37568531
x-ourtime16-32	33205
x-ourtime2-4	7265716
x-ourtime4-8	1653903
x-ourtime8-16	296611
--------------------


More information about the Pdns-users mailing list