From 55cab19e33be1aa762cf04d3362fd69b85585554 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophile=20Bastian?= Date: Thu, 29 Aug 2019 14:46:02 +0200 Subject: [PATCH] Connect with address (not host) --- distribute_tasks.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/distribute_tasks.py b/distribute_tasks.py index 523f88c..4401350 100644 --- a/distribute_tasks.py +++ b/distribute_tasks.py @@ -4,6 +4,7 @@ import threading import queue import sys import time +import socket import random import yaml import paramiko @@ -58,8 +59,9 @@ class Task: class WorkingThread(threading.Thread): """ A thread actually getting work done on a given machine """ - def __init__(self, host, workqueue, failures): + def __init__(self, host, addr, workqueue, failures): self.host = host + self.addr = addr self.client = None self.workqueue = workqueue self.failures = failures @@ -70,18 +72,24 @@ class WorkingThread(threading.Thread): self.client.load_system_host_keys() for n_try in range(3): try: - self.client.connect(self.host, username=CONFIG["username"]) + self.client.connect(self.addr, username=CONFIG["username"]) break except Exception as exn: delay = 3 + random.random() * 4 print( ( - "[{}] Failed to connect. Retry in {} seconds." + "[{}] Failed to connect. Retry in {:.02f} seconds." + "Exception:\n{}" ).format(self.host, delay, exn), file=sys.stderr, ) time.sleep(delay) + else: + print( + "[{}] Failed to connect, stopping thread.".format(self.host), + file=sys.stderr, + ) + return try: while True: @@ -121,7 +129,10 @@ class HostsFile: "Host {} has no {}".format(entry["host"], field) ) raise Exception("Host has no {}".format(field)) - self.hosts[entry["host"]] = {"cores": entry["cores"]} + self.hosts[entry["host"]] = { + "cores": entry["cores"], + "ip": socket.gethostbyname(entry["host"]), + } class TasksFile: