[Pdns-users] CPU consumption of pdns_recursor
Nejedlo, Mark
Mark.Nejedlo at tdstelecom.com
Mon Apr 5 17:30:11 UTC 2021
On Monday, April 5, 2021 10:05 AM, Otto Moerbeek Wrote:
> On Mon, Apr 05, 2021 at 02:40:17PM +0000, Nejedlo, Mark via Pdns-users
> wrote:
>
> > We recently replaced some Bind servers with PowerDNS recursor, and
> > were rather surprised to see CPU usage essentially double for the same
> > workload. My expectation was that the load would be more or less
> > equivalent between the two resolvers. It looks like the load is
> > centered in the actual pdns_recursor worker threads, not the distributor
> > threads or dnsdist. Is it expected that CPU usage would be so much
> > higher under PowerDNS? Is there a debug parameter that can be set to
> > see where PowerDNS is spending its time?
> >
> > I've already gone through the performance guide and don't see any
> > additional tuning parameters that might help. We're using LUA, but 1)
> > it's pretty lightweight (a couple hash lookups and modify dq object),
> > and 2) when I disabled the LUA scripting for testing, it didn't make a
> > noticeable difference in the load.
>
> Impossible to say anything without any info about hardware, os, config
> and metrics.
Hardware is a VMware guest on ESXi, 12 cores, 4GB RAM (no apparent RAM pressure). OS is Red Hat Enterprise 7. Running recursor 4.3.5, via the official PowerDNS centos 7 RPM.
Config Files
------------
/etc/pdns-recursor/recursor.conf:
--------------------
setgid=pdns-recursor
setuid=pdns-recursor
version-string=anonymous
threads=8
pdns-distributes-queries=yes
distributor-threads=2
distribution-load-factor=1.25
query-local-address=64.50.242.202
query-local-address6=2600:3404:200:1:250:56ff:feae:df16
use-incoming-edns-subnet=true
xpf-rr-code=65280
xpf-allow-from=127.0.0.1, ::1
edns-subnet-whitelist=tds.net,tdstelecom.com
local-port=5353
local-address=127.0.0.1,::1
forward-zones-file=/etc/pdns-recursor/forward.zones
lua-dns-script=/etc/pdns-recursor/recursor-script.lua
--------------------
/etc/pdns-recursor/recursor-script.lua
--------------------
-- load business opt out list once on lua startup/reload
nxd_business_opt_out = newNMG()
nxd_boo_f = assert(io.open('/etc/pdns-recursor/NXD_BusinessOptOut.txt', 'r'))
for line in nxd_boo_f:lines() do
if not string.match(line, '[G-Zg-z<>#]') then -- throw away non-data lines associated with answerx formatting
nxd_business_opt_out:addMask(line)
end
end
nxdomainhostskip = {'wpad', 'mail', 'ntp', 'voip', 'ftp', 'irc', 'finger'}
nxdomainhostmatch = {'web', 'my', 'ww', 'home', 'http'}
nxd_data = {}
-- counters for redirects and opt-out hits
nxd_optoutcount = getMetric("nxdomain-opt-out")
nxd_redirectcount = getMetric("nxdomain-redirect")
-- maintenance processes, including:
-- - refreshing the NXDomain hijacking record
function maintenance()
local socket = require("socket")
local udp = assert(socket.udp6())
local resp
local nxd_tmp = {}
udp:settimeout(1)
assert(udp:setsockname("*",0))
assert(udp:setpeername("::1",5354))
for i = 0, 2, 1 do
assert(udp:send("NXD ANY"))
resp = udp:receive()
if resp then
break
end
end
if resp == nil then
pdnslog("NXdomain refresh timeout")
else
local splitdata = {}
for splits in string.gmatch(resp, "%S+") do
table.insert(splitdata, splits)
end
if splitdata[1] == 'NXD' then
local rrtype = ''
local ttl = ''
local ip = ''
for i = 2, #splitdata do
if rrtype == '' then
if splitdata[i] == 'A' or splitdata[i] == 'AAAA' then
rrtype = splitdata[i]
elseif splitdata[i] == 'NODATA' then
-- pdnslog('No data available')
break
else
pdnslog('Invalid rrtype provided')
break
end
elseif ttl == '' then
ttl = tonumber(splitdata[i])
if ttl == nil then
pdnslog('Invalid ttl provided')
break
end
elseif ip == '' then
ip = splitdata[i]
local tmpdr = newDR(newDN('redirect'), rrtype, ttl, ip, pdns.place.ANSWER)
if tmpdr == nil then
pdnslog("Unable to create DR for "..rrtype..' '..ttl..' '..ip)
break
else
table.insert(nxd_tmp, tmpdr)
end
rrtype = ''
ttl = ''
ip = ''
else
pdnslog('Should not be reachable')
end
end -- for i = 2, #splitdata
else
pdnslog("invalid response "..resp)
end -- if splitdata['1'] == 'NXD'
end -- if/else resp == nil
nxd_data = nxd_tmp
end
-- make redirect processing its own function so we can call it from multiple places
function nxd_handler(dq)
for i = 1, #nxd_data do
if dq.qtype == nxd_data[i].type or dq.qtype == pdns.ANY then
dq.rcode = pdns.NOERROR -- make it a normal answer
dq:addAnswer(nxd_data[i].type, nxd_data[i]:getContent(), nxd_data[i].ttl)
end
end
if dq.rcode == pdns.NOERROR then
nxd_redirectcount:inc()
end
return dq
end
-- If resolver returns nxdomain, redirect to searchguide service
function nxdomain(dq)
-- Only redirect for A/AAAA/ANY queries
if dq.qtype == pdns.A or dq.qtype == pdns.AAAA or dq.qtype == pdns.ANY then
-- skip redirect if host part matches the patterns in nxdomainhostskip
for i, hpart in ipairs(nxdomainhostskip) do
if string.match(dq.qname:toString(), "^"..hpart) then
return false
end
end
-- business customer opt-out
if nxd_business_opt_out:match(dq.remoteaddr) then
nxd_optoutcount:inc()
return false
end
-- check if host part matches patterns. if so, redirect to search service
for i, hpart in ipairs(nxdomainhostmatch) do
if string.match(dq.qname:toString(), "^"..hpart) then
dq = nxd_handler(dq)
return true
end
end
-- only redirect queries two or fewer levels deep
if dq.qname:countLabels() < 3 then
dq = nxd_handler(dq)
return true
end
-- too long and didn't match host parts, so skip redirect
return false
end
return false
end
--------------------
Half-hour stats
--------------------
Apr 05 16:35:34 ny00vmp-rdns0.svc.tds.net pdns_recursor[1545]: stats: 8559042686 questions, 824993 cache entries, 102305 negative entries, 81% cache hits
Apr 05 16:35:34 ny00vmp-rdns0.svc.tds.net pdns_recursor[1545]: stats: throttle map: 1480, ns speeds: 74541, failed ns: 3038, ednsmap: 198701
Apr 05 16:35:34 ny00vmp-rdns0.svc.tds.net pdns_recursor[1545]: stats: outpacket/query ratio 28%, 11% throttled, 0 no-delegation drops
Apr 05 16:35:34 ny00vmp-rdns0.svc.tds.net pdns_recursor[1545]: stats: 11031110 outgoing tcp connections, 42 queries running, 67363308 outgoing timeouts
Apr 05 16:35:34 ny00vmp-rdns0.svc.tds.net pdns_recursor[1545]: stats: 508363 packet cache entries, 2% packet cache hits
Apr 05 16:35:34 ny00vmp-rdns0.svc.tds.net pdns_recursor[1545]: stats: thread 0 has been distributed 1127234640 queries
Apr 05 16:35:34 ny00vmp-rdns0.svc.tds.net pdns_recursor[1545]: stats: thread 1 has been distributed 1025150455 queries
Apr 05 16:35:34 ny00vmp-rdns0.svc.tds.net pdns_recursor[1545]: stats: thread 2 has been distributed 1086409376 queries
Apr 05 16:35:34 ny00vmp-rdns0.svc.tds.net pdns_recursor[1545]: stats: thread 3 has been distributed 956026740 queries
Apr 05 16:35:34 ny00vmp-rdns0.svc.tds.net pdns_recursor[1545]: stats: thread 4 has been distributed 1117212754 queries
Apr 05 16:35:34 ny00vmp-rdns0.svc.tds.net pdns_recursor[1545]: stats: thread 5 has been distributed 981577497 queries
Apr 05 16:35:34 ny00vmp-rdns0.svc.tds.net pdns_recursor[1545]: stats: thread 6 has been distributed 878916852 queries
Apr 05 16:35:34 ny00vmp-rdns0.svc.tds.net pdns_recursor[1545]: stats: thread 7 has been distributed 1378105489 queries
Apr 05 16:35:34 ny00vmp-rdns0.svc.tds.net pdns_recursor[1545]: stats: 4290 qps (average over 1800 seconds)
--------------------
rec_control get-all
--------------------
all-outqueries 2426135931
answers-slow 32622243
answers0-1 6963714100
answers1-10 616485007
answers10-100 589467848
answers100-1000 111976956
auth-zone-queries 3913588
auth4-answers-slow 54645139
auth4-answers0-1 502461956
auth4-answers1-10 778877222
auth4-answers10-100 615451605
auth4-answers100-1000 109890924
auth6-answers-slow 9558349
auth6-answers0-1 42211276
auth6-answers1-10 195413588
auth6-answers10-100 100814046
auth6-answers100-1000 13552926
cache-entries 825029
cache-hits 6781159775
cache-misses 1533106380
case-mismatches 0
chain-resends 174881587
client-parse-errors 0
concurrent-queries 75
cpu-msec-thread-0 91219643
cpu-msec-thread-1 142746545
cpu-msec-thread-2 278497218
cpu-msec-thread-3 250309239
cpu-msec-thread-4 250701322
cpu-msec-thread-5 319416792
cpu-msec-thread-6 328608768
cpu-msec-thread-7 264910279
cpu-msec-thread-8 322031125
cpu-msec-thread-9 335541569
dlg-only-drops 0
dnssec-authentic-data-queries 18646074
dnssec-check-disabled-queries 1113241
dnssec-queries 53214851
dnssec-result-bogus 0
dnssec-result-indeterminate 0
dnssec-result-insecure 0
dnssec-result-nta 0
dnssec-result-secure 10986
dnssec-validations 10986
dont-outqueries 45917207
ecs-queries 1451717
ecs-responses 32451
edns-ping-matches 0
edns-ping-mismatches 0
empty-queries 0
failed-host-entries 1356
fd-usage 269
ignored-packets 1478626
ipv6-outqueries 359162038
ipv6-questions 8551805269
malloc-bytes 0
max-cache-entries 1000000
max-mthread-stack 87984
max-packetcache-entries 500000
negcache-entries 102533
no-packet-error 1601797538
noedns-outqueries 3772686
noerror-answers 7669982655
noping-outqueries 0
nsset-invalidations 2704139
nsspeeds-entries 70484
nxdomain-answers 744158682
nxdomain-opt-out 2407297
nxdomain-redirect 93573041
outgoing-timeouts 67362296
outgoing4-timeouts 56885531
outgoing6-timeouts 10476765
over-capacity-drops 13007
packetcache-entries 510580
packetcache-hits 244194152
packetcache-misses 8307487740
policy-drops 0
policy-result-custom 0
policy-result-drop 0
policy-result-noaction 8314084804
policy-result-nodata 0
policy-result-nxdomain 0
policy-result-truncate 0
qa-latency 9496
qname-min-fallback-success 406051209
query-pipe-full-drops 0
questions 8558845589
real-memory-usage 4266360832
rebalanced-queries 1692943695
resource-limits 0
security-status 1
server-parse-errors 91767
servfail-answers 144318969
spoof-prevents 1
sys-msec 539748918
tcp-client-overflow 0
tcp-clients 4
tcp-outqueries 11030988
tcp-questions 7040320
throttle-entries 1356
throttled-out 306121689
throttled-outqueries 306121689
too-old-drops 107804
truncated-drops 0
udp-in-errors 80342
udp-noport-errors 711240
udp-recvbuf-errors 0
udp-sndbuf-errors 0
unauthorized-tcp 0
unauthorized-udp 0
unexpected-packets 46639
unreachables 206095
uptime 6103754
user-msec 2044729297
variable-responses 18555
x-our-latency 59
x-ourtime-slow 2003931
x-ourtime0-1 8265444257
x-ourtime1-2 37568531
x-ourtime16-32 33205
x-ourtime2-4 7265716
x-ourtime4-8 1653903
x-ourtime8-16 296611
--------------------
More information about the Pdns-users
mailing list