1313import backoff
1414import os
1515
16+ class PeerDiscoveryException (Exception ):
17+ pass
18+
1619def construct_service_record ():
1720 # Drop our Pod's unique identity and replace with '_couchdb._tcp'
1821 return os .getenv ('SRV_RECORD' ) or '.' .join (['_couchdb' , '_tcp' ] + socket .getfqdn ().split ('.' )[1 :])
@@ -22,29 +25,31 @@ def construct_service_record():
2225 dns .resolver .NXDOMAIN ,
2326 max_tries = 10
2427)
28+ @backoff .on_exception (
29+ backoff .expo ,
30+ PeerDiscoveryException ,
31+ max_tries = 10
32+ )
2533def discover_peers (service_record ):
26- expected_peers_count = os .getenv ('COUCHDB_CLUSTER_SIZE' )
34+ expected_peers_count = int ( os .getenv ('COUCHDB_CLUSTER_SIZE' ) )
2735 if expected_peers_count :
2836 print ('Expecting' , expected_peers_count , 'peers...' )
2937 else :
3038 print ('Looks like COUCHDB_CLUSTER_SIZE is not set, will not wait for DNS...' )
31- peers_count = 0
32- while str (peers_count ) != expected_peers_count :
33- print ('Resolving SRV record:' , service_record )
34- # Erlang requires that we drop the trailing period from the absolute DNS
35- # name to form the hostname used for the Erlang node. This feels hacky
36- # but not sure of a more official answer
37- answers = dns .resolver .query (service_record , 'SRV' )
38- peers = [rdata .target .to_text ()[:- 1 ] for rdata in answers ]
39- peers_count = len (peers )
40- if expected_peers_count :
41- print ('Discovered' , peers_count , 'of' , expected_peers_count , 'peers:' , peers )
42- if str (peers_count ) != expected_peers_count :
43- print ('Waiting for cluster DNS to fully propagate...' )
44- time .sleep (5 )
45- else :
46- print ('Discovered' , peers_count , 'peers:' , peers )
47- expected_peers_count = str (peers_count )
39+ print ('Resolving SRV record:' , service_record )
40+ # Erlang requires that we drop the trailing period from the absolute DNS
41+ # name to form the hostname used for the Erlang node. This feels hacky
42+ # but not sure of a more official answer
43+ answers = dns .resolver .query (service_record , 'SRV' )
44+ peers = [rdata .target .to_text ()[:- 1 ] for rdata in answers ]
45+ peers_count = len (peers )
46+ if expected_peers_count :
47+ print ('Discovered' , peers_count , 'of' , expected_peers_count , 'peers:' , peers )
48+ if peers_count != expected_peers_count :
49+ print ('Waiting for cluster DNS to fully propagate...' )
50+ raise PeerDiscoveryException
51+ else :
52+ print ('Discovered' , peers_count , 'peers:' , peers )
4853 return peers
4954
5055@backoff .on_exception (
0 commit comments