From c0acf60521eb08aac249bc160ad146ba9f1f9b4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophile=20Bastian?= Date: Thu, 29 Aug 2019 14:37:43 +0200 Subject: [PATCH] Retry connection on error --- distribute_tasks.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/distribute_tasks.py b/distribute_tasks.py index eeeaca2..523f88c 100644 --- a/distribute_tasks.py +++ b/distribute_tasks.py @@ -4,6 +4,7 @@ import threading import queue import sys import time +import random import yaml import paramiko @@ -67,7 +68,20 @@ class WorkingThread(threading.Thread): def run(self): self.client = paramiko.client.SSHClient() self.client.load_system_host_keys() - self.client.connect(self.host, username=CONFIG["username"]) + for n_try in range(3): + try: + self.client.connect(self.host, username=CONFIG["username"]) + break + except Exception as exn: + delay = 3 + random.random() * 4 + print( + ( + "[{}] Failed to connect. Retry in {} seconds." + + "Exception:\n{}" + ).format(self.host, delay, exn), + file=sys.stderr, + ) + time.sleep(delay) try: while True: @@ -176,7 +190,6 @@ class Orchestrator: def start(self): for thread in self.threads: thread.start() - time.sleep(0.1) for thread in self.threads: while thread.is_alive():