#!/usr/bin/env python

"""
Copyright 2000-2022 Citrix Systems, Inc. All rights reserved.
This software and documentation contain valuable trade secrets
and proprietary property belonging to Citrix Systems, Inc.
None of this software and documentation may be copied,
duplicated or disclosed without the express written permission
of Citrix Systems, Inc.
"""

import sys
import logging
import time
import json
import os
import psutil
import signal
import traceback
import logging.handlers
import fcntl
import hashlib
import copy

LogFileName = '/var/log/cloudadapter.log'
MaxBytes = 100 * 1024
LogBackupCount = 25
log = logging.getLogger(sys.argv[0])
logging.basicConfig( format='%(asctime)s %(name)12s %(levelname)s %(message)s', datefmt='%m-%d %H:%M:%S:%s',filename=LogFileName,level=logging.INFO)
# Rotate log for every 100 KB
LogMessageHandler = logging.handlers.RotatingFileHandler(LogFileName, maxBytes=MaxBytes, backupCount=LogBackupCount, delay=True)
LogMessageHandler.setFormatter(logging.Formatter('%(asctime)s %(name)12s %(levelname)s %(message)s'))
log.addHandler(LogMessageHandler)
logging.getLogger("requests").setLevel(logging.CRITICAL)
logging.getLogger("urllib3").setLevel(logging.CRITICAL)
log.propagate = False

sys.path.append("rainman_core/drivers")
from rainman_core.common import rain
config = rain.rainman_config()
cloud = config.get_cloud_config_service()
local = config.get_local_config_service()
ca = config.get_cloudadapter_service(cloud)

###cloudadapter globals#####
poll_time = 20
iteration_interval = 5
supported_platforms = ['AWS']
default_drain_time = 120

def handle_signal(signum, frame):
    log.info("Received signal")
    if signum not in (signal.SIGUSR1, signal.SIGUSR2, signal.SIGTERM):
        return

    if signum == signal.SIGUSR1:
        log.info("No longer primary/CCO node, exiting")
        exit(11)

    if signum == signal.SIGUSR2:
        log.info("SIGUSR2 received")
        try:
            with open('/nsconfig/.cloudadapter-debug') as file_obj:
                data = json.load(file_obj)
        except Exception as e:
            log.error(e)
            return
        set_log_level(data)
        set_iteration_interval(data)
        set_poll_time(data)
        set_default_drain_time(data)
        file_obj.close()
        return

    if signum != signal.SIGTERM:
        return

    try:
        ha_state = local.get_ha_node_state()
    except Exception as e:
        log.exception(e.message)
        log.info("Failed to get ha node state from ADC")
        pass
    else:
        if ha_state == 'Secondary':
            log.info("SIGTERM signal received, No longer primary, Exiting...")
            exit(11)
    return

def set_default_drain_time(data):
    global default_drain_time
    try:
        if 'drain_time' in data:
            default_drain_time = data['drain_time']
        log.info("drain_time is %s"%str(default_drain_time))
        return
    except Exception:
        log.error(traceback.format_exc())
        return

def set_poll_time(data):
    global poll_time
    try:
        if 'poll_time' in data:
            poll_time = data['poll_time']
        log.info("poll_time is %s"%(str(poll_time)))
        return
    except Exception:
        log.error(traceback.format_exc())
        return

def set_iteration_interval(data):
    global iteration_interval
    try:
        if 'iteration_interval' in data:
            iteration_interval = data['iteration_interval']
        log.info("iteration_interval is %s"%(str(iteration_interval)))
        return
    except Exception:
        log.error(traceback.format_exc())
        return

def set_log_level(data):
    try:
        if 'log_level' in data:
            if data['log_level']=="DEBUG":
                log_level = logging.DEBUG
            elif data['log_level']=="INFO":
                log_level = logging.INFO
            elif data['log_level']=="WARNING":
                log_level = logging.WARNING
            elif data['log_level']=="ERROR":
                log_level = logging.ERROR
            elif data['log_level']=="CRITICAL":
                log_level = logging.CRITICAL
            else:
                return
            log.info("Setting the log Level")
            log.setLevel(log_level)
        return
    except Exception:
        log.error(traceback.format_exc())
        return

class aws_cloud_manager():
    """
        This class handles all communications with the cloud
    """
    def __init__(self):
        try:
           while(1):
               ret = cloud.clouadapter_iam_perrmissions()
               if ret==-1 or cloud.missing_iam:
                   log.info("iam permissions missing. Sleeping for 10mins")
                   time.sleep(600)
                   continue
               else:
                   log.info("all required IAM is present")
                   break

           self.instanceid = cloud.get_own_instanceid()
           self.groups = cloud.get_group_info()
           self.availZone = ca.get_own_az(self.instanceid)
           self.sqs = sqs_queue(self.instanceid)
           self.sns = sns_topic()
           self.alarm = alarm()
           self.rule = Rule()
           self.receiptHandle = None
           self.original_max_capacity = 0
           self.cl_created_new_instance = None
           self.n_plus_1_tag = "CitrixADC:faultyServer:capacity_adjustment_instance"
           self.enable_server_tag = "CitrixADC:faultyServer:enable_faulty_server"

           if(not self.availZone):
               log.info("failed to fetch own AZ, exiting.")
               exit(11)
        except Exception:
           log.info("aws_cloud_manager init failed. Exiting")
           exit(11)

    def init_from_config_file(self, config):
        try:
            self.sns.alarm_sns_arn = config['alarm_sns_arn']
            self.sns.rule_sns_arn = config['rule_sns_arn']
            self.sns.alarm_topic_name = config['alarm_topic_name']
            self.sns.rule_topic_name = config['rule_topic_name']
            self.sqs.queue_url = config['sqs_url']
            self.sqs.queue_arn = config['sqs_arn']
            self.sqs.queue_name = config['queue_name']
            self.sqs.rule_subscription_arn = config['rule_subscription_arn']
            self.alarm.rain_subscription_arn = config['rain_subscription_arn']
            self.rule.rule_name = config['rule_name']
            log.info("init_from_config_file success")
        except KeyError as e:
            log.info("KeyError in init_from_config_file")
            config = {}
            log.info(traceback.format_exc())
            return False
        return True

    def write_to_cl_conf(self, conf_file):
        config = {}
        infile = None
        try:
            outfile = open(conf_file, 'w')
            config['alarm_sns_arn'] = self.sns.alarm_sns_arn
            config['alarm_topic_name'] = self.sns.alarm_topic_name
            config['rule_topic_name'] = self.sns.rule_topic_name
            config['sqs_url'] = self.sqs.queue_url
            config['sqs_arn'] = self.sqs.queue_arn
            config['queue_name'] = self.sqs.queue_name
            config['rain_subscription_arn'] = self.alarm.rain_subscription_arn
            config['rule_subscription_arn'] = self.sqs.rule_subscription_arn
            config['rule_sns_arn'] = self.sns.rule_sns_arn
            config['rule_name'] = self.rule.rule_name
            pretty_config = json.dumps(config, sort_keys=True, indent=8, separators=(',', ': '))
            outfile.write(pretty_config)
            outfile.close()
            return True
        except ValueError as e:
            log.error("Configuration is not valid: %s", e)
            log.error(traceback.format_exc())
            if outfile:
                outfile.close()
            return False
        except IOError as e:
            log.error("Unable to open or lock config file: %s", e)
            log.error(traceback.format_exc())
            if outfile:
                outfile.close()
            return False
        except Exception:
            log.error("exception in write_to_cl_conf")
            log.error(traceback.format_exc())
            if outfile:
                outfile.close()
            return False

    def is_my_az(self, az):
        log.debug("comparing AZ: "+self.availZone+" and "+az)
        if(str(self.availZone) == str(az)):
            return True
        return False

    def prepare_setup(self):
        if not self.sqs.setup():
            log.info("cleaning up entities before dying!")
            self.delete_cl_created_entities()
            return False

        if not self.sns.setup():
            log.info("failed to  create sns")
            self.delete_cl_created_entities()
            return False

        #phase-2 : fetch sns_arn_list dynamically
        sns_arn_list = []
        sns_arn_list.append(self.sns.rule_sns_arn)
        if not ca.set_sqs_queue_attributes(self.sqs.queue_url,
                                     sns_arn_list,
                                     self.sqs.queue_arn):
            log.info("failed to set attributes to rule SQS queue")
            self.delete_cl_created_entities()
            return False

        self.sqs.rule_subscription_arn = ca.subscribe_sqs_to_sns_topic(
                                        self.sns.rule_sns_arn,
                                        self.sqs.queue_arn)
        if not self.sqs.rule_subscription_arn:
            log.info("SQS subscription to RULE SNS topic failed")
            self.delete_cl_created_entities()
            return False

        if not self.rule.setup(self.sns.rule_sns_arn):
            log.info("Rule failed")
            self.delete_cl_created_entities()
            return False

        if not self.alarm.setup(self.sns.alarm_sns_arn):
            log.info("Failed to create alarm.We will try again when faulty msg arrives.")
            return True

        sns_arn_list = []
        sns_arn_list.append(self.sns.alarm_sns_arn)
        sns_arn_list.append(self.alarm.rain_sns_arn[0])
        if not ca.set_sqs_queue_attributes(self.alarm.rain_queue_url,
                                     sns_arn_list,
                                     self.alarm.rain_sqs_arn[0]):
            log.info("failed to set attributes to rainman SQS queue")
            self.delete_cl_created_entities()
            return False


        self.alarm.rain_subscription_arn = ca.subscribe_sqs_to_sns_topic(
                                        self.sns.alarm_sns_arn,
                                        self.alarm.rain_sqs_arn[0])
        if not self.alarm.rain_subscription_arn:
            log.info("SQS subscription to ALARM SNS topic failed")
            self.delete_cl_created_entities()
            return False
        return True

    def validate_init(self):
        queue_url = ca.get_sqs_queue(self.sqs.queue_name)
        if not queue_url or queue_url != self.sqs.queue_url:
            log.info("validate_init : no sqs")
            return False

        sns = ca.get_sns_topic_attributes(self.sns.rule_sns_arn)
        if not sns:
            log.info("validate_init : no RULE sns")
            return False

        sns = ca.get_sns_topic_attributes(self.sns.alarm_sns_arn)
        if not sns:
            log.info("validate_init : no ALARM sns")
            return False

        rule_subscription = ca.fetch_subscription_attributes(self.sqs.rule_subscription_arn)
        if not rule_subscription:
            log.info("validate_init : no RULE subscription")
            return False

        rule = ca.get_rule(self.rule.rule_name)
        if not rule:
            log.info("validate_init : no rule")
            return False

        if not self.alarm.does_alarm_param_exist('AlarmActions', [self.sns.alarm_sns_arn]):
            log.info("validate_init : no alarm. Try creating when Faulty Msg arrives")
            return True

        rain_subscription = ca.fetch_subscription_attributes(self.alarm.rain_subscription_arn)
        if not rain_subscription:
            log.info("validate_init : no ALARM subscription")
            return False

        log.info("validate_init success")
        return True

    def make_alarm_configs(self):
        sns_arn_list = []
        sns_arn_list.append(self.sns.alarm_sns_arn)
        sns_arn_list.append(self.alarm.rain_sns_arn[0])
        if not ca.set_sqs_queue_attributes(self.alarm.rain_queue_url,
                                     sns_arn_list,
                                     self.alarm.rain_sqs_arn[0]):
            log.info("failed to set attributes to rainman SQS queue")
            return False

        self.alarm.rain_subscription_arn = ca.subscribe_sqs_to_sns_topic(
                                        self.sns.alarm_sns_arn,
                                        self.alarm.rain_sqs_arn[0])
        if not self.alarm.rain_subscription_arn:
            log.info("SQS subscription to ALARM SNS topic failed")
            return False
        return True

    def delete_cl_created_entities(self):
        log.info(".......delete_cl_created_entities........")
        self.delete_cl_sqs()
        time.sleep(10)
        rule_deleted = self.delete_cl_rule()
        time.sleep(10)
        alarm_deleted = self.delete_cl_alarm()
        time.sleep(10)
        if rule_deleted and alarm_deleted:
            self.delete_cl_sns()
        time.sleep(10)
        log.info("....deletion complete....")

    def delete_cl_sqs(self):
        try:
            if self.sqs.rule_subscription_arn:
                log.debug("rule_subscription_arn present")
                if not ca.unsubscribe_sqs_to_sns_topic(self.sqs.rule_subscription_arn):
                    log.debug("rule_unsubscribe fialed")
            if self.sqs.queue_url:
                log.debug("rule sqs url present")
                if not ca.delete_sqs(self.sqs.queue_url):
                    log.info("rule sqs delete failed")
            if self.alarm.rain_subscription_arn:
                log.debug("alarm.rain_subscription present")
                ca.unsubscribe_sqs_to_sns_topic(self.alarm.rain_subscription_arn)
                ca.set_sqs_queue_attributes(self.alarm.rain_queue_url,
                                      self.alarm.rain_sns_arn,
                                      self.alarm.rain_sqs_arn[0])
            return True
        except Exception:
            log.info(traceback.format_exc())
            return False

    def delete_cl_sns(self):
        if self.sns.rule_sns_arn:
            log.debug("rule sns is there")
            sqs_num = self.check_sns_targets(self.sns.rule_sns_arn)
            log.info("delete_cl_sns: sns has %d sqs"%int(sqs_num))
            if sqs_num==0:
                if not ca.delete_sns(self.sns.rule_sns_arn):
                    log.info("rule_sns delete failed")
        if self.sns.alarm_sns_arn:
            log.debug("alarm sns present")
            sqs_num = self.check_sns_targets(self.sns.alarm_sns_arn)
            if sqs_num==0:
                if not ca.delete_sns(self.sns.alarm_sns_arn):
                    log.info("alarm_sns delete failed")

    def delete_cl_rule(self):
        log.debug("in delete_cl_rule")
        sqs_num = self.check_sns_targets(self.sns.rule_sns_arn)
        log.info("delete_cl_rule: sns has %d sqs"%int(sqs_num))
        if sqs_num==0:
            if ca.delete_targets_from_rule(self.rule.rule_name):
                log.debug("delete_targets_from_rule done!")
            if ca.delete_aws_rule(self.rule.rule_name):
                log.debug("delete_aws_rule done!")
                return True
        return False

    def delete_cl_alarm(self):
        sns_arn_list = []
        alarm = self.alarm.get_cl_alarm()
        if not alarm:
            log.info("no CL alarm to delete")
            return True
        try:
            log.debug("unsubscribe from alarm")
            sns_arn_list = alarm['AlarmActions']
            if len(sns_arn_list) > 1:
                log.info("alarm %s has %d SNS. unexppected!"%(alarm,len(sns_arn_list)))
                return False
            if sns_arn_list[0]:
                sqs_num =  self.check_sns_targets(sns_arn_list[0])
                log.debug("alarm SNS has %d sqs"%int(sqs_num))
                if sqs_num==0:
                    log.debug("alarm SNS has no sqs. delete alarm")
                    if not ca.delete_aws_alarms([self.alarm.name]):
                        log.info("Failed to delete CloudAdapter Alarm")
                        return False
                    log.info("alarm deleted")
                    return True
                else:
                    log.info("alarm SNS has more sqs. not deleting")
                    return True
            if not ca.delete_aws_alarms([self.alarm.name]):
                log.info("Failed to delete CloudAdapter Alarm")
                return False
            log.info("alarm deleted")
            return True
        except Exception:
            log.info(traceback.format_exc())
            return False

    def check_sns_targets(self, sns_arn):
        try:
            response = ca.get_subscriptions(sns_arn)
            if not response:
                log.info("check_cl_sns_targets :could not fetch subscriptions")
                return 2 #so that sns is not deleted
            num = len(response['Subscriptions'])
            log.info("check_cl_sns_targets : %d num of targets found for %s", num,sns_arn)
            if num != 0:
                for subs in response['Subscriptions']:
                    log.info("subscription is to %s"% subs['Endpoint'])
            return num
        except Exception:
            log.info(traceback.format_exc())
            return 2 #so that sns is not deleted

    #NSLB-8957 : to avoid balancing
    def get_least_populated_az_instance(self, faulty_instance, zone_list):
        min_instances = len(zone_list[0]['InstanceId'])
        selectedZone =  zone_list[0]['AvailabilityZone']
        selectedInstance = zone_list[0]['InstanceId'][0]
        for zone in zone_list:
            numInstances =  len(zone['InstanceId'])
            if min_instances > numInstances:
                min_instances = numInstances
                selectedZone = zone['AvailabilityZone']
                selectedInstance = zone['InstanceId'][0]
                break;
        log.debug("min_instances: %d selectedZone[%s] selectedInstance[%s]"%(min_instances,selectedZone,selectedInstance))

        if selectedZone != faulty_instance.az:
            log.debug("selected az is not faulty instance AZ.")
            for zone in zone_list:
                numInstances =  len(zone['InstanceId'])
                if zone['AvailabilityZone'] == faulty_instance.az:
                    if numInstances == min_instances:
                        log.debug("faulty az also has min instances, so selecting it")
                        selectedZone = zone['AvailabilityZone']
                        selectedInstance = zone['InstanceId'][0]
                        break;
                    else:
                        log.debug("faulty az has %d instances, so cannot select it"%(numInstances))
                        break;
        else:
            log.info("selected az is faulty az")
        log.info("FINAL SELECTION: min_instances: %d selectedZone[%s] selectedInstance[%s]"%(min_instances,selectedZone,selectedInstance))
        return selectedInstance

    def unbind_server_from_servicegroup(self, faulty_instance):
        server = rain.server()
        group = rain.group()
        if faulty_instance.name and faulty_instance.port and faulty_instance.servicegroup:
            server.name = faulty_instance.name
            group.port = faulty_instance.port
            group.name = faulty_instance.servicegroup
            local.remove_server_from_group(server, group)
            return True
        return False

    def bind_server_to_servicegroup(self, InstanceId, faulty_instance):
        group = rain.group()
        server = rain.server()
        server.ip = ca.get_instance_details(InstanceId, 'PrivateIpAddress')
        if not server.ip:
            log.info("server has no IP address. Cannot bind to SG. Deleting server")
            if not ca.terminate_ec2_instances([InstanceId]):
                    log.info("failed to terminate instance")
            return None
        log.info("bind_server_to_servicegroup: sevrerIP=%s"%server.ip)
        if faulty_instance.port and faulty_instance.servicegroup:
            group.port = faulty_instance.port
            group.name = faulty_instance.servicegroup
            server.name = server.ip
        local.add_server_to_group(server, group)
        return server.ip

    def drain_instance(self, faulty_instance):
        server = rain.server()
        group = rain.group()
        if faulty_instance.servicegroup and faulty_instance.name and faulty_instance.port:
            group.name = faulty_instance.servicegroup
            server.name = faulty_instance.name
            group.port = faulty_instance.port
            group.drain_time = default_drain_time
            local.drain_server_in_group(server, group)
            return True
        return False


    def remove_ip_type_instance(self, faulty_instance, restore_to_n):
        try:
            log.info("draining faulty instance %s for 120sec"%faulty_instance.instanceID)
            if not self.drain_instance(faulty_instance):
                log.info("failed to drain faulty IP server. But continuing")
            time.sleep(default_drain_time)
            if not ca.terminate_ec2_instances([faulty_instance.instanceID]) or not self.wait_for_status_change_in_cloud(None, faulty_instance.instanceID, 40, None):
                log.info("remove_ip_type_instance: failed to remove instance, revert to N=%s"%str(restore_to_n))
                if not restore_to_n:
                    return False
                log.info("draining n+1 instance for 120sec")
                if not self.drain_instance(self.cl_created_new_instance):
                    log.info("failed to drain n+1 instance, but continuing")
                log.info("unbind n+1 server from SG")
                if not self.unbind_server_from_servicegroup(self.cl_created_new_instance):
                    log.info("failed to unbind server from SG, but continuing")
                log.info("remove instance from cloud")
                if not ca.terminate_ec2_instances([self.cl_created_new_instance.instanceID]):
                    log.info("failed to remove n+1 server from cloud")
                self.cl_created_new_instance.cleanup()
                return False

            return True
        except Exception:
            log.info(traceback.format_exc())
            return False

    def enable_server_in_servicegroup(self, faulty_server):
        server = rain.server()
        group = rain.group()
        if faulty_server.name and faulty_server.servicegroup and faulty_server.port:
            server.name = faulty_server.name
            group.name = faulty_server.servicegroup
            group.port = faulty_server.port
        if not local.enable_server_in_group(server, group):
            log.info("failed to enable faulty server in SG")
            return False
        return True

    def get_ip_instance_LaunchTemplateData(self, faulty_instance):
        if faulty_instance.instanceID:
            LaunchTemplateData = ca.get_template_data(faulty_instance.instanceID)
            if not LaunchTemplateData:
                log.info("get_ip_instance_LaunchTemplateData: failed to get faulty instance template data")
                return None
        if not self.remove_citrix_tags(LaunchTemplateData):
            return None
        return LaunchTemplateData

    def get_asg_instance_details(self, faulty_instance):
        old_instances, desired_capacity, max_capacity, az_map, all_az_list = self.get_old_intances_info(faulty_instance)
        if(old_instances==None or desired_capacity==None or max_capacity==None or az_map==None or all_az_list==None):
            return None

        if(desired_capacity == max_capacity):
            self.original_max_capacity = max_capacity
            max_capacity = max_capacity + 1
            log.info("max and desired capacity are same, hence incrementing max capacity to allow N+1 operation of desired capacity")
            if(not self.update_capacity(faulty_instance, 0, max_capacity,old_instances)):
                log.info("failed to update max capacity")
                return None

        if len(az_map)!=len(all_az_list):
            log.debug("there are AZs that still dont have any instances, so let cloud pick")
            if(not self.update_capacity(faulty_instance, desired_capacity+1, 0, old_instances)):
                log.info("failed to updated desired cap")
                if self.original_max_capacity:
                    if(not self.update_capacity(faulty_instance, 0, self.original_max_capacity, old_instances)):
                        log.info("failed to revert max capacity changes!")
                return None
            log.info("new instance successfully created by cloud")
            ret = {"Complete":True}
            return ret

        sample_instance = self.get_least_populated_az_instance(faulty_instance, az_map)
        if not sample_instance:
            log.info("no sample instance found to add new instance. quitting")
            return None
        log.debug("sample instance : %s"%sample_instance)

        LaunchTemplateData = ca.get_template_data(sample_instance)
        if not LaunchTemplateData:
            log.info("get_asg_instance_details : failed to get template data")
            return None

        log.debug("sample_instance %s and faulty %s"%(sample_instance ,faulty_instance.instanceID))
        if sample_instance == faulty_instance.instanceID:
            if not self.remove_citrix_tags(LaunchTemplateData):
                return None

        ret = { "sample_instance"   : sample_instance,
                "LaunchTemplateData": LaunchTemplateData,
                "old_instances"     : old_instances,
                "desired_capacity"  : desired_capacity}
        return ret

    def run_instance_from_LaunchTemplateData(self, name, LaunchTemplateData, removeIP):
        existing_template = ca.get_templates([name])
        if existing_template:
            if not ca.delete_template(name):
                log.info("Deleting Launch Template also failed")
                return None
        self.prepare_template(LaunchTemplateData,removeIP)
        LaunchTemplateId = ca.create_launch_temp(name, LaunchTemplateData)
        if not LaunchTemplateId:
            log.info("failed to create Launch Template")
            return None
        log.debug("launchtempID %s"%LaunchTemplateId )

        InstanceId = "InvalidIPAddressInUse"
        max_iteration = 10
        while InstanceId=="InvalidIPAddressInUse" and max_iteration:
            time.sleep(poll_time)
            max_iteration -= 1
            InstanceId = ca.create_instance(name)
            if not InstanceId:
                log.info("create_instance failed. No retry since error is not InvalidIPAddressInUse")
                return None
            log.debug("InstanceId: %s"%InstanceId )
        if InstanceId=="InvalidIPAddressInUse":
            log.info("retry failed to bring up instance")
            return None

        if not self.wait_for_instance_to_come_up(InstanceId):
            log.info("new instance failed to come up")
            return None

        return InstanceId

    def attach_instance_to_asg(self, InstanceId, faulty_instance, old_instances, desired_capacity):
        try:
            log.info("Attaching Instance %s to ASG"%(InstanceId))
            if not ca.attach_instance_to_asg(InstanceId, faulty_instance.asg_group):
                log.info("failed to attach new instance to asg")
                if not ca.terminate_ec2_instances([InstanceId]):
                    log.info("failed to terminate new instance")
                return False

            if not self.check_newly_added_instance(faulty_instance,
                                           desired_capacity+1,
                                           old_instances):
                log.info("new instance in ASG failed")
                if not ca.detach_instance_from_asg(InstanceId, faulty_instance.asg_group):
                    log.info("detach_instance_from_asg failed")
                return False

            return True
        except Exception:
            log.info(traceback.format_exc())
            return False


    def remove_citrix_tags(self, LaunchTemplateData):
        try:
            TagSpecifications = LaunchTemplateData['TagSpecifications']
            for spec in TagSpecifications:
                if 'Tags' not in spec:
                    continue
                tags = spec['Tags']
                new_tags = []
                for tag in tags:
                    if tag['Key']==cloud_message_parser().tagAzKey:
                       log.debug("got az tag")
                    elif tag['Key']==cloud_message_parser().tagStatusKey:
                       log.debug("got status tag ")
                    elif tag['Key']==cloud_message_parser().tagListenKey:
                       log.debug("got listenDetails tag")
                    else:
                       new_tags.append(tag)

                if len(new_tags)==0:
                    del LaunchTemplateData['TagSpecifications']
                else:
                    spec['Tags'] = new_tags

            log.debug(LaunchTemplateData)
            return LaunchTemplateData
        except Exception:
            log.info(traceback.format_exc())
            return None

    def wait_for_instance_to_come_up(self, InstanceId):
        try:
            max_iterations=40
            while(max_iterations):
                max_iterations -= 1
                response = ca.get_instance_status(InstanceId)
                if not response and max_iterations!=0:
                    continue
                if len(response["InstanceStatuses"]) == 0:
                    log.debug("Instance is NOT yet created - Sleep & LOOP")
                    time.sleep(iteration_interval )
                    continue

                log.debug(response["InstanceStatuses"][0])
                log.debug("Instance State")
                log.debug(response["InstanceStatuses"][0]["InstanceState"]["Name"])
                if response["InstanceStatuses"][0]["InstanceState"]["Name"] == 'running':
                    log.info("New Instance is in RUNNING STATE -- DONE")
                    return True
                else:
                    log.debug("New Instance is NOT in RUNNING STATE - SLEEP & LOOP")
                    time.sleep(iteration_interval)
            return False
        except Exception:
            log.info(traceback.format_exc())
            return False

    def prepare_template(self, LaunchTemplateData, removeIP):
        ##ClientError: An error occurred (UnsupportedOperation) when calling the RunInstances operation:
        ##The t2.micro instance type does not support specifying CpuOptions.
        try:
            if LaunchTemplateData["InstanceType"] == "t2.micro" or LaunchTemplateData["InstanceType"] == "mac1.metal":
                CpuOptions = LaunchTemplateData.pop("CpuOptions", None)
            if not removeIP:
                return LaunchTemplateData
            ## Remove the IPs associated with the Interfaces
            NetworkInterfaces = LaunchTemplateData["NetworkInterfaces"]
            NoOfNetworkInterfaces = len(NetworkInterfaces)
            SecondaryPrivateIpAddressCount = 0
            for i in range(NoOfNetworkInterfaces):
                PrivateIpAddresses = NetworkInterfaces[i]["PrivateIpAddresses"]
                NoOfPrivateIpAddresses = len(PrivateIpAddresses)
                for j in range(NoOfPrivateIpAddresses):
                    PrivateIpAddress = PrivateIpAddresses[j].pop("PrivateIpAddress", None)
                    log.debug(PrivateIpAddress)
                    if PrivateIpAddresses[j]["Primary"] == False:
                        # Popping jth Element
                        PrivateIpAddresses.pop(j)
                        SecondaryPrivateIpAddressCount += 1;
            if SecondaryPrivateIpAddressCount > 0:
                NetworkInterfaces[i]["SecondaryPrivateIpAddressCount"] = SecondaryPrivateIpAddressCount

            #PrivateIpAddress = LaunchTemplateData["NetworkInterfaces"][0]["PrivateIpAddresses"][0].pop("PrivateIpAddress", None)
            return LaunchTemplateData
        except Exception:
            log.info(traceback.format_exc())
            return None

    def tag_instance(seelf,tagKey, tagValue, instance):
        if not ca.put_tag_on_instances([instance], tagKey, tagValue):
            return False
        log.debug("tagged %s successfully with tagValue=%s"%(instance,tagValue))
        return True


    #always returns False so that queue is cleared
    def slave_cco_task_handler(self, faulty_instance):
        max_iterations = 60
        try:
            while max_iterations:
                max_iterations -= 1
                time.sleep(iteration_interval)

                InstanceId = ca.get_instances_by_tag(self.n_plus_1_tag,faulty_instance.instanceID)
                log.info(type(InstanceId))
                if not InstanceId:
                    log.debug("waiting for n+1 instance")
                    continue
                break

            if max_iterations==0:
                self.delete_message_from_queue()
                log.info("did not receive  the n+1 instance")
                return False
            log.info("received the n+1 instance on slave : %s"%InstanceId)
            n_plus_one_inst = faulty_server()
            n_plus_one_inst.instanceID = InstanceId
            n_plus_one_inst.servicegroup = faulty_instance.servicegroup
            n_plus_one_inst.port  = faulty_instance.port

            n_plus_one_inst.name = self.bind_server_to_servicegroup(InstanceId, faulty_instance)
            if not n_plus_one_inst.name:
                log.info("failed to bind n+1 server to service group. retry")
                self.delete_message_from_queue()
                n_plus_one_inst.cleanup()
                return False
            log.info("binding successfull. Now wait till it comes UP")
            if self.wait_for_status_change_in_adc(n_plus_one_inst.servicegroup,40,n_plus_one_inst.name,"UP")=="UP":
                log.info("bound server in SG is UP")
            else:
                log.info("bound server in SG failed to come UP. Hence remove from slave ADC")
                if not self.unbind_server_from_servicegroup(n_plus_one_inst):
                    log.info("failed to unbind and remove server also! ")
                n_plus_one_inst.cleanup()
                self.delete_message_from_queue()
                return False

            log.info("draining faulty instance %s for 120sec in slave"%faulty_instance.instanceID)
            if not self.drain_instance(faulty_instance):
                log.info("failed to drain faulty IP server. But continuing")
            time.sleep(default_drain_time)

            log.info("wait till new instance with faultyIP is ready")
            max_iterations = 60
            while max_iterations:
                max_iterations -= 1
                time.sleep(iteration_interval)
                if not ca.get_tag_by_instance(n_plus_one_inst.instanceID, self.enable_server_tag):
                    continue
                break
            if max_iterations==0:
                log.info("new instance with faultyIP not received. revert to orginal config")
                if not unbind_server_from_servicegroup(n_plus_one_inst):
                    log.info("failed to unbind n+1 server also!")
                if not self.enable_server_in_servicegroup(faulty_instance):
                    log.info("failed to enable back faulty_instance")
                self.delete_message_from_queue()
                n_plus_one_inst.cleanup()
                return False

            log.info("if arp entry exists, del it")
            if not local.remove_arp_entry(faulty_instance.name):
                log.info("failed to delete ARP entry. Faulty Server will take a long time to become UP")

            log.info("enable faulty server")
            if not self.enable_server_in_servicegroup(faulty_instance):
                log.info("failed to enable faultyserver. revert to orginal config")
                if not unbind_server_from_servicegroup(n_plus_one_inst):
                    log.info("failed to unbind n+1 instance too")
                self.delete_message_from_queue()
                n_plus_one_inst.cleanup()
                return False

            log.info("wait till faulty server  comes up")
            if (self.wait_for_status_change_in_adc(faulty_instance.servicegroup, 40, faulty_instance.name, 'UP')!='UP'):
                log.info("New server created with same IP as faulty server, failed to come UP in servicegroup")
                if not self.unbind_server_from_servicegroup(n_plus_one_inst):
                    log.info("failed to unbind n+1 server")
                self.delete_message_from_queue()
                n_plus_one_inst.cleanup()
                return False

            log.info("finally, draining n+1 instance and removing it")
            if not self.drain_instance(n_plus_one_inst):
                log.info("draining n+1 instance failed. but continue")
            time.sleep(default_drain_time)
            if not self.unbind_server_from_servicegroup(n_plus_one_inst):
                log.info("failed to unbind N+1 server from SG")

            n_plus_one_inst.cleanup()
            self.delete_message_from_queue()
            return False
        except Exception:
            log.info(traceback.format_exc())
            self.delete_message_from_queue()
            return False

    def form_client_token(self, AutoScalingGroupName, AsgInstanceId):
        log.debug("Initialize Client Token")
        try:
            ClientToken = AsgInstanceId
            #ClientToken = hashlib.md5(ClientTokenString.encode())
            #ClientToken = ClientToken.hexdigest()
            log.info(ClientToken)
            return ClientToken
        except Exception:
            log.info(traceback.format_exc())
            return None

    def validate_listenDetails(self, faulty_instance):
        if not faulty_instance or not faulty_instance.instanceID or not faulty_instance.port or not faulty_instance.name or not faulty_instance.servicegroup:
            log.info("validate_listenDetails: missing params")
            log.debug(faulty_instance)
            return False
        response = ca.get_instance_status(faulty_instance.instanceID)
        log.debug("checking if IP instance is present in cloud: %s"%response)
        if not response or 'InstanceStatuses' not in response or not response['InstanceStatuses']:
            log.info("faulty IP type instance %s is not present in cloud"%faulty_instance.name)
            return False
        log.debug("checking if servicegroup exists on ADC")
        if not local.get_group(faulty_instance.servicegroup):
            log.info("servicegroup of faulty server not present in ADC")
            return False
        log.debug("checking if server exists in SG")
        if local.get_status_of_instance_in_asg(faulty_instance.name,faulty_instance.servicegroup)=='server_not_found':
            log.info("servicegroup has no such server in ADC")
            return False
        return True

    def wait_for_faulty_instance(self, faulty_instance):
        if(faulty_instance.instanceID is None):
            slaveCCO_IP_type = False
            message = self.sqs.poll()
            log.debug("wait_for_faulty_instance msg :"+str(message))
            if(message and 'Messages' in message):
                try:
                    log.debug("save ReceiptHandle")
                    self.receiptHandle = cloud_message_parser().do_parse(cloud_message_parser().type().e_RECEIPT_HANDLE, message)
                    if(self.receiptHandle is None):
                        log.debug("No reciptHandle")
                        return False
                    faulty_instance.instanceID, availability_zone, listenDetails = cloud_message_parser().do_parse(cloud_message_parser.type().e_TAGS, message)
                    if(faulty_instance.instanceID is None or availability_zone is None):
                        self.delete_message_from_queue()
                        return False
                    if(not self.is_my_az(availability_zone)):
                        log.info("Not my Availability Zone!")
                        if not listenDetails:
                            self.delete_message_from_queue()
                            return False
                        else:
                            slaveCCO_IP_type = True

                    if(listenDetails):
                        #server name in ADC will be the IP address
                        faulty_instance.servicegroup, faulty_instance.name, faulty_instance.port = listenDetails.split(':')[0], listenDetails.split(':')[1], listenDetails.split(':')[2]
                        if not faulty_instance.servicegroup or not faulty_instance.name or not faulty_instance.port:
                            log.info("faulty instance has no IP or Port. OR not bound to any servicegroup")
                            self.delete_message_from_queue()
                            return False
                        log.debug("listenDetails: Serviecgroup=%s IP=%s Port=%s"%(faulty_instance.servicegroup, faulty_instance.name,str(faulty_instance.port)))
                        if not self.validate_listenDetails(faulty_instance):
                            self.delete_message_from_queue()
                            return False
                        if slaveCCO_IP_type:
                            faulty_instance.type = "SLAVE_CCO_IP"
                            return self.slave_cco_task_handler(faulty_instance)
                        else:
                            faulty_instance.type = "OWNER_CCO_IP"
                        return True
                    else:
                        faulty_instance.asg_group = cloud_message_parser().do_parse(cloud_message_parser.type().e_ASG_NAME, message )
                        if(faulty_instance.asg_group):
                            faulty_instance.type = "ASG"
                            faulty_instance.name = faulty_instance.instanceID
                        else:
                            self.delete_message_from_queue()
                            return False
                        if local.get_status_of_instance_in_asg(faulty_instance.instanceID,faulty_instance.asg_group)=='server_not_found':
                            self.delete_message_from_queue()
                            log.info("ASG %s or fauty serever %s not found "%(faulty_instance.asg_group,faulty_instance.instanceID))
                            return False
                        log.debug("faulty server present in "+faulty_instance.asg_group+", proceed to remove it")
                        log.info("check if alarm is created. If not, create it")
                        while(1):
                            if not self.alarm.setup(self.sns.alarm_sns_arn):
                                log.info("Failed to create alarm to handle ASG faulty server msg. try again")
                                time.sleep(poll_time)
                                continue
                            if not self.make_alarm_configs():
                                return False
                            break
                        return True
                except Exception:
                    self.delete_message_from_queue()
                    log.info(traceback.format_exc())
                    return False
            else:
                log.debug("Messages not present in notification received")
                self.delete_message_from_queue()
                return False

    def get_old_intances_info(self, faulty_instance):
        response = ca.describe_asg(faulty_instance.asg_group)
        old_instances_list = cloud_message_parser().do_parse(cloud_message_parser.type().e_INSTANCE_LIST, response)
        desired_capacity = cloud_message_parser().do_parse(cloud_message_parser.type().e_DESIRED_CAPACITY, response)
        max_capacity = cloud_message_parser().do_parse(cloud_message_parser().type().e_MAX_CAPACITY, response)
        az_instance_map = cloud_message_parser().do_parse(cloud_message_parser.type().e_AZ_INSTANCE_MAP, response)
        all_az_list = cloud_message_parser().do_parse(cloud_message_parser.type().e_AZ_LIST, response)
        all_instances = cloud_message_parser().do_parse(cloud_message_parser.type().e_INSTANCE_DETAILS, response)
        try:
           for instance in all_instances:
               if str(instance['InstanceId'])==str(faulty_instance.instanceID):
                   faulty_instance.az = instance['AvailabilityZone']
                   log.debug("faulty instance belongs to %s"%(faulty_instance.az))
                   return old_instances_list ,desired_capacity, max_capacity, az_instance_map, all_az_list
           log.info("cloud not find az of faulty instance")
           return None,None,None,None,None
        except Exception:
           log.info(traceback.format_exc())
           return None,None,None,None,None

    def delete_message_from_queue(self):
        if(self.receiptHandle == None):
            log.debug("no receiptHandle, hence not deleting message")
            return False
        if( not ca.dequeue_sqs_queue(self.sqs.queue_url, self.receiptHandle)):
            log.info("Failed to dequeue msg with handle:",self.receiptHandle)
            return False
        self.receiptHandle = None
        log.info("dequeue of SQS done")
        return True

    def wait_for_status_change_in_cloud(self, asg_name, instance, max_iterations, status):
        try:
            while(max_iterations != 0):
                max_iterations = max_iterations - 1
                time.sleep(iteration_interval)

                if not asg_name:
                    response = ca.get_instance_status(instance)
                    log.info(response)
                    if not response or 'InstanceStatuses' not in response or not response['InstanceStatuses']:
                        log.info("%s removed in cloud",instance)
                        return True
                    else:
                        log.debug("ec2 %s still present in cloud: %s",instance, response)
                        continue

                response = ca.describe_asg(asg_name)

                if status == "remove":
                    instance_list = cloud_message_parser().do_parse(
                                              cloud_message_parser.type().e_INSTANCE_LIST,
                                              response)
                    if not instance_list:
                        log.debug("failed to fetch instance_list this time, but lets proceed")
                        continue

                    for server in instance_list:
                        removed = True
                        if server==instance:
                            log.debug("%s still present in cloud",instance)
                            removed=False
                            break
                    if removed:
                        log.info("%s removed in cloud",instance)
                        return True
                    continue
                elif status == "InService":
                    instance_details = cloud_message_parser().do_parse(
                                             cloud_message_parser.type().e_INSTANCE_DETAILS,
                                             response)
                    if not instance_details:
                        return False
                    for instance_detail in instance_details:
                        if instance_detail['InstanceId']==instance:
                            log.debug("LifecycleState of %s is %s"%(
                                                    instance_detail['InstanceId'],
                                                    instance_detail['LifecycleState']))
                            if instance_detail['LifecycleState']=='InService':
                                return True
                    continue
            log.info("wait_for_status_change_in_cloud: status NOT changed")
            return False
        except Exception:
            log.info(traceback.format_exc())
            return False

    def wait_till_cloud_update(self, asg_name, update_param, max_iterations, e_type):
        current_param = None
        try:
            while(current_param != update_param and max_iterations != 0):
                max_iterations = max_iterations - 1
                time.sleep(iteration_interval)
                response = ca.describe_asg(asg_name)
                parsed_param = cloud_message_parser().do_parse(e_type, response)
                log.debug("parsed_param :"+str(parsed_param))
                if e_type == cloud_message_parser.type().e_INSTANCE_LIST:
                    current_param = len(parsed_param)
                else:
                    current_param = parsed_param
        except Exception:
           log.info(traceback.format_exc())
           return False, None

        if(current_param == update_param ):
            log.info("parameter has been updated in ASG ")
            return True, parsed_param
        else:
            log.info("Parameter has NOT been updated in ASG")
            return False, parsed_param

    def is_scaling_in_progress(self, asg_name):
        sns_alarms=[]
        for arn in self.alarm.rain_sns_arn:
            alarms = self.alarm.get_actionPrefix_metric_alarm(arn)
            sns_alarms = sns_alarms+alarms
        log.debug("fetched %s alarms for %s rainman SNS",len(sns_alarms),len(self.alarm.rain_sns_arn))

        if self.is_scale_down_inprogress(asg_name, sns_alarms):
            log.info("scaleDOWN in progress, go to sleep")
            if not self.enable_disabled_alarms():
                self.retry_changing_alarm_state()
            return True

        if(self.is_scale_up_inprogress(asg_name)):
            log.info("scaleUP is in progress, so re-enable alarms")
            if not self.enable_disabled_alarms():
                self.retry_changing_alarm_state()
                #what to do if this also fails??
            return True
        return False

    def enable_disabled_alarms(self):
        try:
            for alarm in self.alarm.disabled_alarms:
                ca.enable_alarm(alarm['AlarmName'])
            return True
        except Exception:
            log.info(traceback.format_exc())
            return False

    def retry_changing_alarm_state(self,asg_name=None):
        max_iteration = 5
        try:
            while(max_iteration):
                max_iteration -= 1
                time.sleep(iteration_interval)
                if asg_name:
                    done = self.change_policy_alarm_status(asg_name, True)
                else:
                    done = self.enable_disabled_alarms()
                if done:
                    return True
        except Exception:
            log.info(traceback.format_exc())
            return False
        return False

    def is_scale_up_inprogress(self, asg_name):
        if not self.change_policy_alarm_status(asg_name, False):
            return True
        response = ca.get_status_of_scaling_activities(asg_name)
        if not response:
            return True
        status = cloud_message_parser().do_parse(cloud_message_parser().type().e_SCALING_STATUS, response)
        if(status=="InProgress" or status=="PreInService" or status==None):
            log.info("status of last ScalingActivity is IN_Progress ")
            if not self.change_policy_alarm_status(asg_name, True):
                self.retry_changing_alarm_state(asg_name)
            return True
        log.info("NO scalingActivity is in progress "+status)
        return False

    def is_scale_down_inprogress(self, asg_name, sns_alarms):
        if self.check_rainman_alarms(sns_alarms):
            return True
        log.info("checking server states")
        if self.check_trofs_state_of_servers(asg_name):
            return True

    def check_trofs_state_of_servers(self, asg_name):
        try:
            class Group():
                def __init__(self, asg):
                    self.name = asg
            group = Group(asg_name)
            servers = local.get_servers_in_group(group)
            for server in servers:
                state = local.get_status_of_instance_in_asg(server.name, asg_name)
                log.debug("state of server "+server.name+" in asg "+asg_name+" is "+state)
                if(state == 'GOING OUT OF SERVICE' or state == 'OUT OF SERVICE' or state == 'DOWN WHEN GOING OUT OF SERVICE'):
                    log.debug("server "+server.name+" is in TROFS")
                    return True
        except Exception:
            log.info(traceback.format_exc())
            return True
        return False

    def check_rainman_alarms(self, sns_alarms):
        for this_alarm in sns_alarms:
            log.debug("check if %s alarm notfities Rainman SNS" % this_alarm['AlarmName'])
            if 'AlarmDescription' in this_alarm and ("RainmanScaleDownAlarm" in this_alarm['AlarmDescription']):
                log.debug("validation succes. Now check Status")
                if(this_alarm['StateValue']=='ALARM'):
                    log.debug("Status : IN_ALARM")
                    return True
                else:
                   ca.disable_alarm(this_alarm['AlarmName'])
                   log.info("%s is not active. Hence disabled it" % this_alarm['AlarmName'])
                   self.alarm.disabled_alarms.append(this_alarm)

        if len(self.alarm.disabled_alarms)==0:
            log.info("No Alarms notify to Rainman's SNS")
            return True
        return False

    def update_capacity(self, faulty_instance,desired_capacity, max_capacity, old_instances):

        if(max_capacity):
            max_iterations = 20
            if not ca.update_asg_max_capacity(faulty_instance.asg_group, max_capacity):
                return False

            log.info("checking if max capacity is updated in ASG..")
            updated, updated_max_capacity = self.wait_till_cloud_update(
                                                faulty_instance.asg_group,
                                                max_capacity,
                                                max_iterations,
                                                cloud_message_parser.type().e_MAX_CAPACITY
                                                )
            if not updated:
                return False
            return True

        if(desired_capacity):
            max_iterations = 20
            if not ca.update_asg_desired_capacity(faulty_instance.asg_group, desired_capacity):
                log.info("set back max capacity as setting desired capacity Failed")
                return False

            log.info("checking if desired capacity is updated in ASG..")
            updated, updated_desired_capacity = self.wait_till_cloud_update(
                                                    faulty_instance.asg_group,
                                                    desired_capacity,
                                                    max_iterations,
                                                    cloud_message_parser.type().e_DESIRED_CAPACITY
                                                    )
            if(not updated):
                if max_capacity:
                    log.info("desired capacity not updated in ASG, so revert max_cap, if changed")
                return False

            if self.check_newly_added_instance(faulty_instance,
                                               updated_desired_capacity,
                                               old_instances):
                return True
            return False

    def check_newly_added_instance(self, faulty_instance, desired_capacity, old_instances):
            max_iterations = 40
            log.info("let number of instances increase to match desired capacity")
            updated, new_instances = self.wait_till_cloud_update(
                                             faulty_instance.asg_group,
                                             desired_capacity,
                                             max_iterations,
                                             cloud_message_parser.type().e_INSTANCE_LIST
                                             )
            if(not updated):
                return False

            new_instance = self.get_new_instance(old_instances, new_instances,faulty_instance.asg_group)
            if(new_instance is None):
                return False

            max_iterations = 40
            if not self.wait_for_status_change_in_cloud(
                                                    faulty_instance.asg_group,
                                                    new_instance,
                                                    max_iterations,
                                                    "InService"):
                return False
            if(self.wait_for_status_change_in_adc(
                                              faulty_instance.asg_group,
                                              max_iterations,
                                              new_instance,
                                              "UP")=="UP"):
                log.info("new instance is UP")
                return True
            return False

    def wait_for_status_change_in_adc(self, asg_name, max_iterations, instance, expected_status):
        status = None
        try:
            while(max_iterations and str(status) != str(expected_status)):
                max_iterations = max_iterations-1
                time.sleep(iteration_interval)
                status = local.get_status_of_instance_in_asg(
                                            instance,
                                            asg_name)
                log.debug("instance status : "+str(status))
            return status
        except Exception:
            log.info(traceback.format_exc())
            return status

    def get_new_instance(self, old_instances, new_instances, asg_name):
        try:
            for instance in new_instances:
                if instance not in old_instances:
                    new_instance = instance
                    break
            log.info("newly added instance is :"+str(new_instance))
            return new_instance
        except Exception:
            log.info(traceback.format_exc())
            return None

    def trigger_cl_alarm(self, faulty_instance):
        log.info("triggering cloud adapter alarm for %s in asg:%s"%(faulty_instance.instanceID,faulty_instance.asg_group))
        alarm_name = self.alarm.name
        StateReasonData = {'slow_server' : faulty_instance.instanceID, 'asg_name' : faulty_instance.asg_group}
        str_state_reason_data = json.dumps(StateReasonData)
        if not ca.update_alarm_state(alarm_name, 'OK', 'SLOW_SERVER', str_state_reason_data):
            log.debug("failed to set cloudadapter ALARM to OK")
            return False
        time.sleep(iteration_interval)
        if not ca.update_alarm_state(alarm_name, 'ALARM', 'SLOW_SERVER', str_state_reason_data):
            log.debug("failed to set cloudadapter-alarm to ALARM state")
            return False
        time.sleep(iteration_interval)
        if not ca.update_alarm_state(alarm_name, 'OK', 'SLOW_SERVER', str_state_reason_data):
            log.debug("failed to set cloudadapter ALARM back to OK. But should not be an issue")
        return True

    def get_drain_time(self, asg_name):
        rain_conf = RainmanConfig()
        config = rain_conf.get_config()
        if not config:
            log.info("no rainman config to fetch drain_time")
            return 0
        try:
            for group in config['groups']:
                if group["name"]==asg_name:
                    drain_time = group["drain_time"]
                    log.debug("rainman drain time is "+str(drain_time)+" for "+group["name"])
                    return float(drain_time)
            log.info("unable to get rainman drain time. returning 0")
            return 0
        except Exception:
            log.info(traceback.format_exc())
            return 0

    #only in case max_cap is changed by us, we have to wait till the
    #faulty instance is removed, because reverting max_cap to N while
    #desired_cap is N+1 will trigger termination of another instance.
    def remove_instance(self, faulty_instance):
        if not self.trigger_cl_alarm(faulty_instance):
            return False
        drain_time = self.get_drain_time(faulty_instance.asg_group)
        time.sleep(float(drain_time))
        #buffer of 40*iteration_interval sec given
        max_iterations = 40 + (int(drain_time)/iteration_interval)
        if(self.wait_for_status_change_in_adc(faulty_instance.asg_group,
                                                  max_iterations,
                                                  faulty_instance.instanceID,
                                                  'server_not_found')=='server_not_found'):
            log.info("faulty_instance %s is removed in ADC.."% faulty_instance.instanceID)
        if self.wait_for_status_change_in_cloud(faulty_instance.asg_group,
                                                    faulty_instance.instanceID,
                                                    max_iterations,
                                                    "remove"):
            log.info("faulty_instance %s is removed in Cloud"% faulty_instance.instanceID)
            return True
        log.info("failed to remove faulty-instance "+str(faulty_instance.instanceID)+". revert all changes")
        return False

    def change_policy_alarm_status(self, asg_name, status):
        policies_description = ca.describe_asg_policies(asg_name)
        if not policies_description:
            log.info("change_policy_alarm_status : ca.describe_asg_policies returned Null")
            return False

        policy_alarm_list = cloud_message_parser().do_parse(
                                cloud_message_parser().type.e_POLICY_ALARM,
                                policies_description)
        if not policy_alarm_list:
            log.info("change_policy_alarm_status : policies have no alarm")
            return False

        try:
            for index, alarm in enumerate(policy_alarm_list):
                if status == True:
                    log.info("enabling %s"% alarm)
                    response = ca.enable_alarm(alarm)
                elif status == False:
                    log.info("disabling %s"% alarm)
                    response = ca.disable_alarm(alarm)
                else:
                    return False

                if not response:
                    while index >= 0:
                        if status == True:
                            log.info("re-disabling %s"% alarm)
                            response = ca.disable_alarm(alarm[index-1])
                        elif status == False:
                            log.info("re-enabling %s"% alarm)
                            response = ca.enable_alarm(alarm[index-1])
                        index -= 1
                    return False
        except Exception:
            log.info(traceback.format_exc())
            return False
        return True

#utility class/function. can be moved to a new file
class cloud_message_parser():

    class type():
        e_INTSTANCE_ID = 1,
        e_POLICYLIST_OF_ASG = 2,
        e_TAGS = 3,
        e_DESIRED_CAPACITY = 4,
        e_MAX_CAPACITY = 5,
        e_RECEIPT_HANDLE = 6,
        e_ASG_NAME = 7,
        e_AVAILABILITY_ZONE = 8,
        e_SCALING_STATUS = 9,
        e_SNS_ARN = 10,
        e_INSTANCE_LIST = 11,
        e_POLICY_ALARM = 12,
        e_INSTANCE_DETAILS = 13,
        e_SQS_ARN = 14,
        e_POLICY_ALARM = 15,
        e_AZ_INSTANCE_MAP = 16,
        e_QUEUE_URL = 17,
        e_AZ_LIST = 18

    def __init__(self):
        self.tagStatusKey = 'CitrixADC:faultyServer:status'
        self.tagStatusValue = 'True'
        self.tagAzKey = 'CitrixADC:faultyServer:ProcessingUnitAZ'
        self.tagListenKey = 'CitrixADC:faultyServer:ListenDetails'

    def do_parse(self, type, response):
        #response should be got by describing asg by Name.
        #Hence checking only [0]th element in below cases.
        if type:
            if type ==  self.type().e_DESIRED_CAPACITY:
                try:
                    asg = response["AutoScalingGroups"][0]
                    log.info("[desiredCap]: " + str(asg["DesiredCapacity"]))
                    desired_capacity = asg["DesiredCapacity"]
                    return desired_capacity
                except Exception:
                    log.info(traceback.format_exc())
                    log.info("exception in parsing desired capacity")
                    return None

            elif type ==  self.type().e_AZ_LIST:
                try:
                    asg = response["AutoScalingGroups"][0]
                    az_list = asg["AvailabilityZones"]
                    az_num = len(az_list)
                    log.debug("%d number of az are present"%az_num)
                    return az_list
                except Exception:
                    log.info(traceback.format_exc())
                    return None

            elif type == self.type().e_INSTANCE_LIST:
                try:
                    asg = response["AutoScalingGroups"][0]
                    instances = []
                    for instance in asg["Instances"]:
                        instances.append(instance["InstanceId"])
                    return instances
                except Exception:
                    log.info(traceback.format_exc())
                    log.info("exception while getting instances")
                    return None

            elif type == self.type().e_INSTANCE_DETAILS:
                try:
                    asg = response["AutoScalingGroups"][0]
                    instance_list = asg["Instances"]
                    return instance_list
                except Exception:
                    log.info(traceback.format_exc())
                    log.info("exception while getting instance deatils")
                    return None

            elif type == self.type().e_POLICYLIST_OF_ASG:
                try:
                    policy_names_list = []
                    for policy in response["ScalingPolicies"]:
                        policy_names_list.append(policy["PolicyName"])
                    return policy_names_list
                except Exception:
                    log.info(traceback.format_exc())
                    log.info("exception while getting policies")
                    return None

            elif type == self.type().e_TAGS:
                if('Messages' not in response):
                    return None,None,None
                try:
                    for message in response['Messages']:
                        main_body = message['Body']
                        inner_message = json.loads(json.loads(main_body)["Message"])
                        detailtype = inner_message['detail-type']
                except KeyError:
                        log.info("KeyError in Message while retreiving InstanceID")
                        log.info(traceback.format_exc())
                        return None,None,None
                if detailtype == 'Tag Change on Resource':
                    try:
                         listenDetails = None
                         detail = inner_message['detail']
                         faulty_tag = detail['tags'][self.tagStatusKey]
                         processing_az =  detail['tags'][self.tagAzKey]
                         log.info("faulty message detected with tag: "+
                               str(faulty_tag)+" and AZ: "+processing_az)

                         if self.tagListenKey in detail['tags']:
                             listenDetails = detail['tags'][self.tagListenKey]
                             log.info("faulty message detected with tag:"+listenDetails)
                         else:
                             log.debug("no listenDetails")

                         if (faulty_tag == self.tagStatusValue):
                             resource = inner_message['resources'][0]
                             log.debug("resource "+resource)
                             faulty_instance = resource.split('instance/')[1]
                             log.info("extracted faulty instance from tag as : "+faulty_instance)
                             return faulty_instance, processing_az, listenDetails
                    except KeyError:
                        log.info("KeyError in Tags")
                        log.info(traceback.format_exc())
                        return None,None,None
                    except Exception:
                        log.info("Exception in Tags")
                        log.info(traceback.format_exc())
                        return None,None,None
                return None,None,None

            elif type == self.type().e_MAX_CAPACITY:
                try:
                    asg = response["AutoScalingGroups"][0]
                    log.info("[maxcap]: " + str(asg["MaxSize"]))
                    max_capacity = asg["MaxSize"]
                    return max_capacity
                except Exception:
                    log.info("exception while retreiving max capacity")
                    log.info(traceback.format_exc())
                    return None

            elif type == self.type().e_RECEIPT_HANDLE:
                try:
                    for message in response['Messages']:
                        receiptHandle = message['ReceiptHandle']
                        log.debug("receiptHandle : "+receiptHandle)
                        return receiptHandle
                except KeyError:
                        log.info("KeyError in ReceiptHandle Message")
                        log.info(traceback.format_exc())
                        return None

            elif type == self.type().e_ASG_NAME:
                for message in response['Messages']:
                    try:
                        main_body = message['Body']
                        inner_message = json.loads(json.loads(main_body)["Message"])
                        tags = inner_message['detail']['tags']
                        log.debug("do_parse: tags: "+str(tags))
                        if('aws:autoscaling:groupName' in tags):
                            asg_name = tags['aws:autoscaling:groupName']
                            return asg_name
                        else:
                            log.info("asg name not present in message")
                            return None
                    except Exception:
                        log.info(traceback.format_exc())
                        log.info("Exception caught in e_ASG_NAME")
                        return None

            elif type == self.type().e_AVAILABILITY_ZONE:
                az = None
                try:
                    for message in response['Messages']:
                        main_body = message['Body']
                        inner_message = json.loads(json.loads(main_body)["Message"])
                        az = inner_message['region']
                        log.debug("parsed AZ as : "+str(az))
                        return az
                except Exception:
                    log.info(traceback.format_exc())
                    return None

            elif type == self.type().e_SCALING_STATUS:
                try:
                    activity = response["Activities"][0]
                    status = activity["StatusCode"]
                    log.info("scaling status : ")
                    log.info((status))
                    return status
                except KeyError:
                    log.info("KeyError in e_scaling_status")
                    log.info(traceback.format_exc())
                    return None

            elif type == self.type().e_SNS_ARN:
                sns_arn_list=[]
                try:
                    if("event_queues" in response):
                        for event_queue in response['event_queues']:
                            sns_arn = json.loads(event_queue['details'])["sns_arn"]
                            log.debug("sns_arn "+sns_arn)
                            sns_arn_list.append(sns_arn)
                        return sns_arn_list
                except Exception:
                    log.info(traceback.format_exc())
                    return None
                log.info("clouldnt find sns_arn ")
                return None


            elif type == self.type().e_QUEUE_URL:
                url = None
                try:
                    if("event_queues" in response):
                        for event_queue in response['event_queues']:
                            url = json.loads(event_queue['details'])["sqs_url"]
                    return url
                except Exception:
                    log.info(traceback.format_exc())
                    return None

            elif type == self.type().e_SQS_ARN:
                sqs_arn_list=[]
                try:
                    if("event_queues" in response):
                        for event_queue in response['event_queues']:
                            sqs_arn = json.loads(event_queue['details'])["sqs_arn"]
                            log.debug("sqs_arn "+sqs_arn)
                            sqs_arn_list.append(sqs_arn)
                        return sqs_arn_list
                except Exception:
                    log.info(traceback.format_exc())
                    return None
                log.info("clouldnt find sqs_arn ")
                return None

            elif type == self.type().e_POLICY_ALARM:
                alarm_list=[]
                try:
                    for asg in response['ScalingPolicies']:
                        for alarm in asg['Alarms']:
                            alarm_list.append(alarm['AlarmName'])
                    return alarm_list
                except Exception:
                    log.info(traceback.format_exc())
                    return None

            #zone_list = [{'AvailabilityZone': 'az1', 'InstanceId': ['a', 'b']},
            #             {'AvailabilityZone': 'az2', 'InstanceId': ['c', 'd']}]
            elif type == self.type().e_AZ_INSTANCE_MAP:
                zone_name_list = []
                zone_list = []
                try:
                    asg = response["AutoScalingGroups"][0]
                    instance_list = asg["Instances"]

                    min_az = instance_list[0]['AvailabilityZone']
                    for instance in instance_list:
                        az = instance['AvailabilityZone']
                        inst_id = instance['InstanceId']
                        if az not in zone_name_list:
                            zone_info = {}
                            zone_info = {'AvailabilityZone': '', 'InstanceId':[] }
                            zone_info['AvailabilityZone'] = az
                            zone_info['InstanceId'].append(inst_id)
                            zone_list.append(zone_info)
                            zone_name_list.append(az)
                        else:
                            for zone in zone_list:
                                if zone['AvailabilityZone'] == az:
                                    zone['InstanceId'].append( instance['InstanceId'])
                                    break
                    log.info(zone_list)
                    log.debug(zone_name_list)
                    return zone_list
                except Exception:
                    log.info(traceback.format_exc())
                    return None

class Rule():
    """
        This class handles all actions related to rule which sends tag-change events
        to cloudAdapter SNS topic, that is subscribed to the rule.
    """
    def __init__(self):
        self.rule_name = "CLOUDADAPTER_RULE"
        rule_pattern = {"source": ["aws.tag"], "detail-type": ["Tag Change on Resource"], "detail": { "service": ["ec2"], "resource-type": [  "instance"], "tags": {cloud_message_parser().tagStatusKey : [cloud_message_parser().tagStatusValue]}}}
        self.rule_pattern = json.dumps(rule_pattern)
        self.rule_state = 'ENABLED'

    def setup(self, sns_arn):
        if not ca.get_rule(self.rule_name):
            log.debug("Rule is not created yet by any cluster")
            if not ca.create_rule(self.rule_name, self.rule_pattern, self.rule_state):
                log.info("Failed to create Rule")
                return False
            if not ca.assign_target_to_rule(self.rule_name, sns_arn):
                log.info("Failed to assign_target_to_rule")
                return False
            log.info("my target assigned to rule")
            return True
        log.debug("Rule is already created by another cluster")
        if not ca.assign_target_to_rule(self.rule_name, sns_arn):
            log.info("Failed to assign_target_to_rule")
            return False
        log.info("my target assigned to rule")
        return True

    def __del__(self):
        self.rule_name = None
        self.rule_pattern = None
        self.rule_state = None

class RainmanConfig():

    def __init__(self):
        self.config_file = '/nsconfig/rainman.conf'
        self.config = {}

    def __del__(self):
        self.config_file = None
        self.config = {}

    def get_config(self):
        try:
            infile = open('/nsconfig/rainman.conf', 'r')
            fcntl.flock(infile, fcntl.LOCK_EX | fcntl.LOCK_NB)
            config_json = infile.read()
            self.config = json.loads(config_json)
            if infile:
                infile.close()
            return self.config
        except Exception:
            log.info(traceback.format_exc())
            self.config = {}
            if infile:
                infile.close()
            return None

    def get_sqs_arn(self, config):
        sqs_arn = cloud_message_parser().do_parse(
                         cloud_message_parser().type().e_SQS_ARN,
                         config)
        return sqs_arn

    def get_sns_arn(self, config):
        sns_arn = cloud_message_parser().do_parse(
                          cloud_message_parser().type().e_SNS_ARN,
                          config)
        return sns_arn

    def get_queue_url(self, config):
        url = cloud_message_parser().do_parse(
                          cloud_message_parser().type().e_QUEUE_URL,
                          config)
        return url

class alarm():

    def __init__(self):
        self.disabled_alarms = []
        self.name = "ADC_CLOUDADAPTER_ALARM"
        self.alarm_asg_name = "CITRIX_ADC_DUMMY_ASG_NAME"
        self.rain_conf = RainmanConfig()
        self.rain_sqs_arn = []
        self.rain_sns_arn = []
        self.rain_subscription_arn = None
        self.rain_queue_url = None

    def fetch_rainman_details(self):
        config = self.rain_conf.get_config()
        if not config:
            log.debug("rainman config not available yet")
            return False
        self.rain_sqs_arn = self.rain_conf.get_sqs_arn(config)
        self.rain_sns_arn = self.rain_conf.get_sns_arn(config)
        self.rain_queue_url = self.rain_conf.get_queue_url(config)
        if not self.rain_sqs_arn or not self.rain_sns_arn:
            log.debug("rainman SqS/SnS not available yet to create alarm")
            return False
        log.debug("got rainman sqs: %s"% self.rain_sqs_arn[0])
        return True

    def __del__(self):
        self.disabled_alarms = []
        self.name = ""
        self.alarm_asg_name = ""
        del self.rain_conf
        self.rain_sns_arn = []
        self.rain_subscription_arn = None

    def get_actionPrefix_metric_alarm(self, action_prefix):
        response = ca.get_alarms(action_prefix)
        try:
            if 'MetricAlarms' not in response:
                log.info("no Metric Alarms for this ASG")
                return None
            log.info(response['MetricAlarms'])
            return response['MetricAlarms']
        except Exception:
            log.info(traceback.format_exc())
            return None

    def get_cl_alarm(self):
        if not self.rain_sqs_arn:
            return None
        metric_alarms = self.get_actionPrefix_metric_alarm(None)
        if metric_alarms:
            for alarm in metric_alarms:
                if alarm['AlarmName']==self.name:
                    return alarm
        return None

    def setup(self, alarm_sns):
        if not self.fetch_rainman_details():
            return False
        log.info("checking if alarm already exists..")
        if not self.does_alarm_param_exist('AlarmActions', [alarm_sns]):
            log.debug("no alarm, so creating new one")
            sns_arn_list = [alarm_sns]
            if not ca.create_metric_alarm(self.name, sns_arn_list, self.alarm_asg_name):
                log.info("failed to create alarm!")
                return False
            log.info("cloud adapter alarm created and it has cloudadapter SNS as notif target")
        return True

    def does_alarm_param_exist(self, param, value):
        cl_alarm = self.get_cl_alarm()
        if not cl_alarm:
            return False
        log.debug("got cl alarm, now check if it has the global sns")
        try:
            fetched_param = cl_alarm[param]
            log.info(fetched_param)
            if param=='AlarmActions':
                for item in value:
                    if item not in fetched_param:
                        log.info("my global sns not found in cl alarm")
                        return False
            log.debug("cl alarm has my global sns")
            return True
        except Exception:
            log.info(traceback.format_exc())
            return False

class sqs_queue():
    """
        This class handles all activities related to Simple Queue Service of the cloud
    """
    def __init__(self, name_suffix, wait_time=10, max_msg=10):
        self.queue_name = "CLOUDADAPTER_QUEUE_%s" % (str(name_suffix))
        self.queue_url = None
        self.queue_arn = None
        self.rule_subscription_arn = None
        self.WaitTimeSeconds = wait_time
        self.MaxNumberOfMessages = max_msg

    def setup(self):
        response = ca.create_sqs_queue(self.queue_name)
        if not response:
            log.info("failed to create SQS queue.")
            return False
        self.queue_url = response.url
        self.queue_arn = ca.get_sqs_queue_attributes(self.queue_url, 'QueueArn')
        if not self.queue_arn:
            log.info("failed to fetch arn of SQS queue!")
            return False
        return True

    def poll(self):
        response = ca.poll_sqs_queue(self.queue_url,
                                  self.MaxNumberOfMessages,
                                  self.WaitTimeSeconds)
        return response

    def __del__(self):
        self.queue_name = None
        self.queue_url = None
        self.queue_arn = None
        self.rule_subscription_arn = None
        self.WaitTimeSeconds = 0
        self.MaxNumberOfMessages = 0

class sns_topic():
    def __init__(self):
        self.rule_topic_name = "CLOUDADAPTER_RULE_TOPIC"
        self.alarm_topic_name = "CLOUDADAPTER_ALARM_TOPIC"
        self.rule_sns_arn = None
        self.alarm_sns_arn = None

    def __del__(self):
        self.rule_topic_name = None
        self.alarm_topic_name = None
        self.sns_topic_arn = None

    def setup(self):
        response = ca.create_sns_topic(self.rule_topic_name)
        if not response or 'TopicArn' not in response:
            log.info("failed to create RULE SNS topic!")
            return False
        self.rule_sns_arn = response['TopicArn']

        response = ca.create_sns_topic(self.alarm_topic_name)
        if not response or 'TopicArn' not in response:
            log.info("failed to create ALARM  SNS topic!")
            return False
        self.alarm_sns_arn = response['TopicArn']
        return True

class faulty_server():
    def __init__(self):
        self.instanceID = None
        self.name = None
        self.az = None
        self.type = None
        #specific to ASG self healing
        self.asg_group = None
        #specific to IP type self healing
        self.faulty_instance_launch_template = None
        self.servicegroup = None
        self.port = 0

    def cleanup(self):
        self.instanceID = None
        self.name = None
        self.asg_group = None
        self.az = None
        self.type = None
        self.faulty_instance_launch_template = None
        self.port = 0
        self.servicegroup = None

class cloud_adapter():
    """
        This class handles all logical functionalities of
        Cloud Adapter process
    """

    def __init__(self, poll_time, platform):
        if platform.name=='AWS':
            self.cm = aws_cloud_manager()
        self.poll_period = poll_time
        self.faulty_instance = faulty_server()

    def initialize(self, conf_file):
        if not self.cm.prepare_setup():
            return False
        if not self.cm.write_to_cl_conf(conf_file):
            return False
        return True

    def restore_faulty_server_IP(self, faulty_instance):
        log.info("restore_faulty_server_IP:bringing back faulty_instance with same IP")
        name = "cloudadaptr_%s"%self.faulty_instance.instanceID
        if not self.cm.run_instance_from_LaunchTemplateData(name, faulty_instance.faulty_instance_launch_template, False):
            log.info("failed to bring up faulty server")
            self.cm.cl_created_new_instance.cleanup()
            return False

        log.info("tagging n+1 instance to inform other nodes to enable faulty sevrer")
        self.cm.tag_instance(self.cm.enable_server_tag, "True", self.cm.cl_created_new_instance.instanceID)

        log.info("deleting arp and enabling server in SG")
        arp_deleted = True
        if not self.cm.enable_server_in_servicegroup(faulty_instance):
            # we should leave the server in SG for ADS config match
            return False
        if not local.remove_arp_entry(faulty_instance.name):
            log.info("failed to delete ARP entry. Faulty Server will take a long time to become UP")
            arp_deleted = False

        if arp_deleted:
            time.sleep(30)
            max_iterations = 40
            if (self.cm.wait_for_status_change_in_adc(faulty_instance.servicegroup, max_iterations, faulty_instance.name, 'UP')!='UP'):
                log.info("New server created with same IP as faulty server, failed to come UP in servicegroup")
                self.cm.cl_created_new_instance.cleanup()
                return False

        log.info("finally, remove n+1 instance and restore config")
        if not self.cm.remove_ip_type_instance(self.cm.cl_created_new_instance, False):
            log.info("failed to remove N+1 server from cloud, but continue to unbind in ADC")
        if not self.cm.unbind_server_from_servicegroup(self.cm.cl_created_new_instance):
            log.info("failed to unbind N+1 server from SG")
            #tested that if n+1 is bound to SG, edit/deletion of app has no issues from ADS.
            #Hence, no need to return False here.
        self.cm.cl_created_new_instance.cleanup()
        return True

    def remove_faulty_instance(self, faulty_instance):
        if faulty_instance.type == "OWNER_CCO_IP":
            if not self.cm.remove_ip_type_instance(faulty_instance, True):
                return False
            return True

        removed = self.cm.remove_instance(faulty_instance)
        if not removed:
            log.info("Since instance removal failed, revert from N+1")
            if not self.cm.remove_instance(self.cm.cl_created_new_instance):
                log.info("failed to revert from  n+1!!")
            log.debug("successfully reverted to N")

        if self.cm.original_max_capacity:
            if not self.cm.update_capacity(faulty_instance, 0,self.cm.original_max_capacity, None):
                log.info("failed to revert max capacity")
                self.cm.original_max_capacity = 0
                return False
            self.cm.original_max_capacity = 0

        if removed:
            return True
        return False

    def create_new_instance(self, faulty_instance):
        if faulty_instance.type == "OWNER_CCO_IP":
            response = self.get_ip_instance_details(faulty_instance)
        else:
            response = self.cm.get_asg_instance_details(faulty_instance)

        if not response:
            return False
        if('Complete' in response):
            return True
        if 'sample_instance' not in response or 'LaunchTemplateData' not in response:
            return False

        try:
            name = "cloudadaptr_%s"%response['sample_instance']
            InstanceId = self.cm.run_instance_from_LaunchTemplateData(name, response['LaunchTemplateData'], True)
            if not InstanceId:
                return False

            if(faulty_instance.type=="ASG"):
                if not self.cm.attach_instance_to_asg(InstanceId, faulty_instance, response['old_instances'], response['desired_capacity']):
                    return False
            elif(faulty_instance.type=="OWNER_CCO_IP"):
                log.info("bind server to servicegroup")
                new_server_ip = self.cm.bind_server_to_servicegroup(InstanceId, faulty_instance)
                if not new_server_ip:
                    log.info("failed to bind n+1 server to service group")
                    return False
                log.info("binding successfull. Now wait till it comes UP")
                max_iterations=40
                if self.cm.wait_for_status_change_in_adc(faulty_instance.servicegroup,max_iterations,new_server_ip,"UP")=="UP":
                    log.info("bound server in SG is UP")
                else:
                    log.info("bound server in SG failed to come UP.Hence remove from ADC and cloud")
                    if not local.remove_server(new_server_ip):
                        log.info("failed to remove server also! ")
                    if not ca.terminate_ec2_instances([InstanceId]):
                        log.info("failed to remove instance from cloud also!")
                    return False
            if not ca.delete_template(name):
                log.info("failed to delete launch template")

            self.cm.cl_created_new_instance = faulty_server()
            self.cm.cl_created_new_instance = copy.deepcopy(faulty_instance)
            self.cm.cl_created_new_instance.instanceID = InstanceId
            if faulty_instance.type=="OWNER_CCO_IP":
                self.cm.cl_created_new_instance.name = new_server_ip
                log.info("tag new instance to alert other CCOs")
                if not self.cm.tag_instance(self.cm.n_plus_1_tag,faulty_instance.instanceID,InstanceId):
                    log.info("unable to tag. Revert to original config")
                    if not local.remove_server(new_server_ip):
                        log.info("failed to remove server! ")
                    if not ca.terminate_ec2_instances([InstanceId]):
                        log.info("failed to remove instance from cloud also!")
                    return False
            return True
        except Exception:
            log.info(traceback.format_exc())
            return False

    def get_ip_instance_details(self, faulty_instance):
        LaunchTemplateData = self.cm.get_ip_instance_LaunchTemplateData(faulty_instance)
        if LaunchTemplateData:
            faulty_instance.faulty_instance_launch_template = copy.deepcopy(LaunchTemplateData)
        else:
            log.info("get_ip_instance_details : failed to get LaunchTemplateData")
            return None

        ret = {'sample_instance': faulty_instance.instanceID,
               'LaunchTemplateData': LaunchTemplateData}
        return ret

    def reset_self_heal_changes(self, faulty_instance):
        if faulty_instance.type != "ASG":
            faulty_instance.cleanup()
            return
        if not self.cm.change_policy_alarm_status(faulty_instance.asg_group, True):
            log.info("reset_self_heal_changes FAILED at enabling policy alarms")
        del self.cm.alarm.disabled_alarms[:]
        faulty_instance.cleanup()

    def handle_init_process(self, cl, proc_handle):
        while(1):
            if proc_handle.config:
                if not cl.cm.init_from_config_file(proc_handle.config) or not cl.cm.validate_init():
                    if not cl.initialize(proc_handle.config_file):
                        log.info("init failed, trying again after sleeping")
                        time.sleep(60)
                        continue
            else:
                if not cl.initialize(proc_handle.config_file):
                    log.info("init failed, trying again after sleeping")
                    time.sleep(20)
                    continue
            log.debug("handle_init_process complete")
            break

    def schedule(self):
        rain_conf = RainmanConfig()
        while(1):
            time.sleep(self.poll_period)
            if(not self.cm.wait_for_faulty_instance(self.faulty_instance)):
                self.faulty_instance.cleanup()
                continue
            if(self.faulty_instance.type == "ASG"):
                if(self.cm.is_scaling_in_progress(self.faulty_instance.asg_group)):
                    self.faulty_instance.cleanup()
                    continue
            if(not self.create_new_instance(self.faulty_instance)):
                self.cm.delete_message_from_queue()
                self.reset_self_heal_changes(self.faulty_instance)
                self.faulty_instance.cleanup()
                continue
            if(not self.remove_faulty_instance(self.faulty_instance)):
                self.cm.delete_message_from_queue()
                self.reset_self_heal_changes(self.faulty_instance)
                self.faulty_instance.cleanup()
                continue
            if(self.faulty_instance.type == "OWNER_CCO_IP"):
                if not self.restore_faulty_server_IP(self.faulty_instance):
                    self.delete_message_from_queue()
                    self.faulty_instance.cleanup()
                    continue

            self.reset_self_heal_changes(self.faulty_instance)
            self.cm.delete_message_from_queue()
            log.info("Self healing complete")

        self.cm.delete_cl_created_entities()
        return False

class PlatformParams():
    def __init__(self, platform_name):
        if platform_name=='AWS':
            self.name = 'AWS'
            self.pid_file = '/flash/nsconfig/.AWS/cloudadapter.pid'

class ProcessHandler():
    def __init__(self, platform):
        self.pid_file = platform.pid_file
        self.config_file = '/nsconfig/cloudadapter.conf'
        self.config = {}
        if(not self.is_primary_node() or
           not self.write_to_pid_file()):
            self.exit_daemon(11)
        self.fetch_data_from_config_file()

    def _get_config(self):
        try:
            infile = open(self.config_file, 'r')
            config_json = infile.read()
            self.config = json.loads(config_json)
        except Exception as e:
            log.info(traceback.format_exc())
            self.config = {}
            log.info("failed to get config file: %s", e)
            return False
        infile.close()
        return True

    def fetch_data_from_config_file(self):
        if not self._get_config():
            if not self.create_cl_config_file():
                self.exit_daemon(11)

    def write_to_pid_file(self):
        try:
            with open(self.pid_file, "w") as fp:
                fp.write("%d" % os.getpid())
                log.debug("created pid file")
                return True
        except IOError as e:
            log.info(traceback.format_exc())
            log.info("Not able to create pid file %s error:" + str(e) % (self.pid_file))
            return False

    def create_cl_config_file(self):
        try:
            log.info("Creating config file %s..." % self.config_file)
            data = {}
            outfile = open(self.config_file, 'w')
            json.dump(data, outfile)
            outfile.close()
            self.config = {}
            return True
        except IOError as e:
            log.info(traceback.format_exc())
            log.info("Not able to create pid file %s error:" + str(e) % (self.pid_file))
            return False

    def clear_pid_file(self):
        try:
            os.remove(self.pid_file)
        except OSError:
            log.info(traceback.format_exc())
            pass
    def clear_conf_file(self):
        try:
            os.remove(self.config_file)
        except OSError:
            log.info(traceback.format_exc())
            pass

    def exit_daemon(self, ret_code):
        self.clear_pid_file()
        exit(ret_code)

    def is_primary_node(self):
        nodestate = local.get_node_config()
        if nodestate in ['Primary', 'CCO', 'StandAlone']:
            return True
        log.info("Not on a primary node. %d exiting..." % os.getpid())
        self.create_cl_config_file()
        return False

def check_running_process():
        ret = 0
        for proc in psutil.process_iter():
                try:
                        pinfo = proc.as_dict(attrs=['pid', 'cmdline'])
                        cmdline = pinfo['cmdline']
                        log.debug("cmdline %d and pinfo[pid]=%s"%(len(cmdline),pinfo['pid']))
                        if len(cmdline) > 1 and 'python' in cmdline[0] and 'cloudadapter' in cmdline[1] and pinfo['pid'] != os.getpid():
                                log.info("Another cloudadapter process %d is running, %d exiting..." % (pinfo['pid'], os.getpid()))
                                ret = 1
                except psutil.NoSuchProcess:
                        pass
        return ret

supported_platforms = ['AWS']
ret = check_running_process()
if ret != 0:
    exit(10)
platform_name = cloud.get_cloud_platform()
if platform_name not in supported_platforms:
    log.info("platform not supported %s"% cloud.get_cloud_platform())
    exit(10)
else:
    log.info("AWS platform")
platform = PlatformParams(platform_name)
log.info("cloudadapter process %d starting..." % os.getpid())


"""
        Cloud Adapter is a python process that communicates
        from Netscaler to Cloud. It informs the cloud
        about the changes done in Netscaler, so cloud can also do the same
"""
def main():
    signal.signal(signal.SIGTERM, handle_signal)
    signal.signal(signal.SIGUSR1, handle_signal)
    signal.signal(signal.SIGUSR2, handle_signal)

    while(1):
        proc_handle = ProcessHandler(platform)
        cl = cloud_adapter(poll_time, platform)
        cl.handle_init_process(cl,proc_handle)
        cl.schedule()
        proc_handle.clear_conf_file()

if(__name__)=="__main__":
    main()
