import os
import gzip
import re
import csv
import ipaddress
import argparse
import logging
import time
from urllib.parse import urlparse
from datetime import datetime, timedelta
from collections import defaultdict
from enum import Enum
from calendar import month_abbr

# Simple progress indicator using built-in modules
class ProgressIndicator:
    def __init__(self, total, desc="Progress", unit="items", leave=True, mininterval=1.0):
        self.total = total
        self.desc = desc
        self.unit = unit
        self.leave = leave
        self.mininterval = mininterval
        self.current = 0
        self.start_time = time.time()
        self.last_print = 0
        self.width = 50  # Progress bar width
        
    def __enter__(self):
        return self
    
    def __exit__(self, exc_type, exc_val, exc_tb):
        if self.leave:
            self._print_progress(final=True)
    
    def __iter__(self):
        return self
    
    def __next__(self):
        if self.current >= self.total:
            raise StopIteration
        value = self.current
        self.current += 1
        return value
    
    def update(self, n=1):
        self.current += n
        current_time = time.time()
        if current_time - self.last_print >= self.mininterval or self.current >= self.total:
            self._print_progress()
            self.last_print = current_time
    
    def _print_progress(self, final=False):
        if self.total == 0:
            return
            
        percent = min(100.0 * self.current / self.total, 100.0)
        filled = int(self.width * self.current / self.total)
        
        # Use ASCII characters for better cross-platform compatibility
        if os.name == 'nt':  # Windows
            bar = '#' * filled + '-' * (self.width - filled)
        else:  # Unix/Linux/Mac
            bar = '█' * filled + '░' * (self.width - filled)
        
        elapsed = time.time() - self.start_time
        if self.current > 0 and not final:
            rate = self.current / elapsed
            eta = (self.total - self.current) / rate if rate > 0 else 0
            eta_str = f" ETA: {int(eta)}s" if eta > 0 else ""
        else:
            eta_str = ""
        
        status = f"\r{self.desc}: {percent:5.1f}%|{bar}| {self.current}/{self.total} {self.unit}{eta_str}"
        
        print(status, end='\n' if final else '', flush=True)

def progress_bar(iterable, desc="Progress", unit="items", leave=True, mininterval=1.0):
    """Create a progress bar for iterables"""
    if hasattr(iterable, '__len__'):
        total = len(iterable)
        progress = ProgressIndicator(total, desc, unit, leave, mininterval)
        for i, item in enumerate(iterable):
            if i > 0:  # Don't update on first iteration
                progress.update()
            yield item
        progress.update()  # Final update
    else:
        # For non-sized iterables, just yield items without progress
        for item in iterable:
            yield item

# === ENUMS ===
class VpnUrlProtocol(Enum):
    RDP = "rdp"
    FTP = "ftp"
    HTTP = "http"
    HTTPS = "https"

class AddressType(Enum):
    IP = "ip"
    FQDN = "fqdn"
    HOSTNAME = "hostname"
    UNKNOWN = "unknown"

class AppType(Enum):
    TCP_UDP = "TCP/UDP"
    HTTP_HTTPS = "HTTP/HTTPS"
    SAAS = "SaaS"

class AppLocation(Enum):
    INSIDE = "Inside Corporate Network"
    OUTSIDE = "Outside Corporate Network"

class Protocol(Enum):
    TCP = "PROTOCOL_TCP"
    UDP = "PROTOCOL_UDP"
    HTTPS = "HTTPS"
    HTTP = "HTTP"
    UNKNOWN = "UNKNOWN"

class LogType(Enum):
    TCPCONNSTAT = "TCPCONNSTAT"
    UDPFLOWSTAT = "UDPFLOWSTAT"

class IpClass(Enum):
    UNKNOWN = "Unknown"
    CLASS_A = "A"
    CLASS_B = "B"
    CLASS_C = "C"

class ProtocolLiterals:
    SSL = "SSL"
    SSL_BRIDGE = "SSL_BRIDGE"
    TCP_STR = "TCP"
    UDP_STR = "UDP"
    HTTP= "HTTP"
    HTTPS= "HTTPS"

class FileConstants:
    NS_CONF_DEFAULT = "/nsconfig/ns.conf"
    LOG_DIR_DEFAULT = "/var/log/"
    LOG_PATTERN = "ns.log"
    OUTPUT_CSV_PREFIX = "applications"

class RegexPatterns:
    VPN_URL_PROTO = r'(rdp|ftp|http|https)://[^\s]+'
    FQDN = r"^(?=.{1,253}$)(?:[a-zA-Z0-9-]{1,63}\.)+[a-zA-Z]{2,63}$"
    HOSTNAME = r"^(?!-)[A-Za-z0-9-]{1,63}(?<!-)$"
    IP_PATTERN = r'^(\d{1,3}\.){3}\d{1,3}$|^\[?[a-fA-F0-9:]+\]?$'

class TimeConstants:
    VISIT_TIME_INTERVAL = 2

class URLConstants:
    HTTPS_SCHEME = "https://"
    HTTP_SCHEME = "http://"
    FTP_SCHEME = "ftp://"
    RDP_SCHEME = "rdp://"
    
    # List of supported schemes for easy iteration
    SUPPORTED_SCHEMES = [HTTP_SCHEME, HTTPS_SCHEME, FTP_SCHEME, RDP_SCHEME]

class NSConfPatterns:
    INTRANET_APP = r'add vpn intranetApplication\s+"?([^"\s]+)"?\s+(TCP|ANY|UDP)\s+"?([^"\s]+)"?(?:.*?-netmask\s+([^\s]+))?(?:.*?-destPort\s+([^\s]+))?'
    DNS_SUFFIX = r'add dns suffix\s+([^\s]+)'
    VPN_URL = r'add vpn url "?([^"]+)"? "?([^"]+)"? "([^"]+)"'
    LB_VSERVER_DETAIL = r'^add\s+lb\s+vserver\s+"?(?P<name>[^\"]+?)"?\s+(?P<protocol>HTTP|TCP|UDP|SSL|SSL_BRIDGE)\s+(?P<ip>\S+)\s+(?P<port>\S+)'
    CS_VSERVER = r"add cs vserver (?P<csvserver>\S+)(?P<protocol>HTTP|TCP|UDP|SSL)\S+ (?P<port>\d+)"
    CS_POLICY = r"add cs policy (?P<cspolicy>\S+)"
    CS_VSERVER_BIND = r"bind cs vserver (?P<csvserver>\S+) -policyName (?P<cspolicy>\S+)"
    CS_POLICY_EQ = r'HTTP\.REQ\.HOSTNAME\.\S*?(EQ|eq|EQUALS_ANY)\(\\"(?P<rule>.*?)\\"'
    CS_POLICY_CONTAINS = r'HTTP\.REQ\.HOSTNAME\.\S*?(CONTAINS|CONTAINS_ANY)\(\\"(?P<rule>.*?)\\"'
    VPN_BIND = r'-intranetApplication\s+([^\s]+)'
    REDIRECT_URL = r'-redirectURL\s+(\S+)'
    HTTPS_REDIRECT_URL = r'-httpsRedirectUrl\s+(\S+)'

# === CSV COLUMN CONSTANTS ===
class CSVColumns:
    APP_NAME = "AppName"
    APP_LOCATION = "AppLocation"
    APP_TYPE = "AppType"
    URL = "URL"
    RELATED_DOMAINS = "RelatedDomains"
    DESTINATION_PORT_PROTOCOL = "DestinationPortProtocol"
    ROUTING_TYPE = "RoutingType"
    RESOURCE_LOCATION = "ResourceLocation"
    DESCRIPTION = "Description"
    CATEGORY = "Category"
    UNIQUE_USERS = "UniqueUsers"
    TOTAL_VISITS = "TotalVisits"
    UNIQUE_USERS_PER_APP = "UniqueUsersperApp"

parser = argparse.ArgumentParser(description="ZTNA Migration Script")
parser.add_argument("nsconf_path", nargs="?", default=FileConstants.NS_CONF_DEFAULT, help="Path to ns.conf")
parser.add_argument("log_dir", nargs="?", default=FileConstants.LOG_DIR_DEFAULT, help="Directory containing logs")
parser.add_argument("--debug", action="store_true", help="Enable debug logging")
parser.add_argument("--force", action="store_true", help="Force execution even when high CPU usage is detected")
args = parser.parse_args()

NSCONF_PATH = args.nsconf_path
LOG_DIR = args.log_dir
LOG_PATTERN = FileConstants.LOG_PATTERN
DEBUG_LOGS = args.debug
FORCE_EXECUTION = args.force

#add timestamp to output file
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
OUTPUT_CSV = f"{FileConstants.OUTPUT_CSV_PREFIX}_{timestamp}.csv"
CSV_COLUMNS = [
    CSVColumns.APP_NAME, CSVColumns.APP_LOCATION, CSVColumns.APP_TYPE, 
    CSVColumns.URL, CSVColumns.RELATED_DOMAINS, CSVColumns.DESTINATION_PORT_PROTOCOL, 
    CSVColumns.ROUTING_TYPE, CSVColumns.RESOURCE_LOCATION, CSVColumns.DESCRIPTION, 
    CSVColumns.CATEGORY, CSVColumns.UNIQUE_USERS, CSVColumns.TOTAL_VISITS, 
    CSVColumns.UNIQUE_USERS_PER_APP
]

# --- NSLOG PARSER & GROUPER ---
LOG_REGEX1 = re.compile(
    r"^(?P<syslog_prefix_tcp_udp>[A-Za-z]{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2})\s+<[^>]+>\s+[^\s]+\s+(?P<timestamp_tcp_udp>\d{2}/\d{2}/\d{4}:\d{2}:\d{2}:\d{2} GMT).*? "
    r"SSLVPN (?P<protocol>TCPCONNSTAT|UDPFLOWSTAT) .*? "
    r"User (?P<user_tcp>[^\s]+) .*? " 
    r"Destination (?P<destination_ip>[\d.]+):(?P<destination_port>\d+)|"
    r"^(?P<syslog_prefix_http>[A-Za-z]{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2})\s+<[^>]+>\s+[^\s]+\s+(?P<timestamp_http>\d{2}/\d{2}/\d{4}:\d{2}:\d{2}:\d{2} GMT).*? "
    r"SSLVPN HTTPREQUEST .*? (?P<http_res>[^\s]+) User (?P<user_http>[^\s]+)"
)
LOG_REGEX2 = re.compile(
    r"^(?P<syslog_prefix_tcp_udp>[A-Za-z]{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2})\s+<[^>]+>\s+[^\s]+\s+(?P<timestamp_tcp_udp>\d{2}/\d{2}/\d{4}:\d{2}:\d{2}:\d{2} ) .*? "
    r"SSLVPN (?P<protocol>TCPCONNSTAT|UDPFLOWSTAT) .*? "
    r"User (?P<user_tcp>[^\s]+) .*? " 
    r"Destination (?P<destination_ip>[\d.]+):(?P<destination_port>\d+)|"
    r"^(?P<syslog_prefix_http>[A-Za-z]{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2})\s+<[^>]+>\s+[^\s]+\s+(?P<timestamp_http>\d{2}/\d{2}/\d{4}:\d{2}:\d{2}:\d{2} ) .*? "
    r"SSLVPN HTTPREQUEST .*? (?P<http_res>[^\s]+) User (?P<user_http>[^\s]+)"
)

LOG_REGEX3 = re.compile(
    r"^(?P<syslog_prefix_tcp_udp>[A-Za-z]{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2})\s+<[^>]+>\s+[^\s]+\s+(?P<timestamp_tcp_udp>\d{4}/\d{2}/\d{2}:\d{2}:\d{2}:\d{2} GMT) .*? "
    r"SSLVPN (?P<protocol>TCPCONNSTAT|UDPFLOWSTAT) .*? "
    r"User (?P<user_tcp>[^\s]+) .*? " 
    r"Destination (?P<destination_ip>[\d.]+):(?P<destination_port>\d+)|"
    r"^(?P<syslog_prefix_http>[A-Za-z]{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2})\s+<[^>]+>\s+[^\s]+\s+(?P<timestamp_http>\d{4}/\d{2}/\d{2}:\d{2}:\d{2}:\d{2} GMT) .*? "
    r"SSLVPN HTTPREQUEST .*? (?P<http_res>[^\s]+) User (?P<user_http>[^\s]+)"
)

class vpn_url_proto(Enum):
    RDP = "rdp"
    FTP = "ftp"
    HTTP = "http"
    HTTPS = "https"

vpn_url_proto_map = {
    'rdp': vpn_url_proto.RDP,
    'ftp': vpn_url_proto.FTP,
    'http': vpn_url_proto.HTTP,
    'https': vpn_url_proto.HTTPS,
}

def get_vpn_url_proto_match(url):
    pattern = r'(rdp|ftp|http|https)://[^\s]+'
    for match in re.finditer(pattern, url):
        scheme = match.group(1)
        enum_value = vpn_url_proto_map[scheme]
        return enum_value.value
    return None  # Return None if no match found

LOG_REGEXES = [LOG_REGEX1, LOG_REGEX2, LOG_REGEX3]
app_unique_users = defaultdict(set)
unique_users = defaultdict(set)
CSVservers = {}
CSPolicies = {}
CSVsPolicies = {}
total_visits = defaultdict(int)
app_total_visits = defaultdict(int)
app_user_last_visit = {}
# Global dictionaries for tracking app names and their destinations
app_name_counts = defaultdict(int)  # Track how many times each app name has been used
app_name_mappings = {}  # Map original app names to their final names with suffixes
app_destinations = defaultdict(set)  # Track unique destinations per base app name
app_destination_count = defaultdict(int)  # Track number of destinations per app name (reverse mapping)
lb_vservers = {}
# --- NSCONF PARSER ---
class NSConfApp:
   
    def __repr__(self):
        return f"NSConfApp(name={self.name}, dest={self.dest}, proto={self.proto}, port={self.port})"

    def __init__(self, name, dest, netmask, proto, port, dns_suffix=None, is_active=False, app_type=AppType.TCP_UDP.value):
        self.name = name
        self.dest = dest
        self.netmask = netmask
        self.proto = proto
        self.port = port
        self.is_any_proto = 0
        self.dns_suffix = dns_suffix
        self.is_active = is_active
        self.app_type = app_type
        
# --- LOGGER SETUP ---
LOG_FILE = f"applications_{timestamp}.log"
LOG_LEVEL = logging.DEBUG if DEBUG_LOGS else logging.INFO

# Simple logging configuration
logging.basicConfig(
    filename=LOG_FILE,
    level=LOG_LEVEL,
    format='%(asctime)s %(levelname)s: %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)

logger = logging.getLogger(__name__)

class NSConfParser:
    def __init__(self, conf_path):
        self.conf_path = conf_path
        self.intranet_apps = []
        self.dns_suffixes = set()
        self.vpn_urls = []
        self.lb_cs_vservers = []
        self.active_intranet_apps = set()
        self.temp_active_apps = set()
        self.subnet_map = {}  # subnet: (app_name, netmask, proto, port)
        self.range_map = {}   # range: (app_name, proto, port)
        self.ip_map = {}  # ip: (app_name, proto, port)
        self.fqdn_map = {}
        
        # Add simple caches for performance optimization
        self._ip_match_cache = {}
        self._fqdn_match_cache = {}
        self._lb_vserver_cache = {}
        
        # Pre-computed active apps set for O(1) lookup performance
        self._active_apps_set = set()  # Fast lookup for active apps

    def get_redirect_url(self, line: str, flag: str):
        """
        Extracts the redirect URL from the configuration line after the given flag.
        Returns the URL as a string, or None if not found.
        """
        try:
            pattern = rf"{re.escape(flag)}\s+(\S+)"
            match = re.search(pattern, line)
            if match:
                logger.debug(f"Redirect URL found for flag {flag}: {match.group(1)}")
                return match.group(1)
        except Exception as ex:
            logger.error(f"Error extracting redirect URL from line: {line} - {ex}")
        return None

    def get_port_from_url(self, url: str):
        """
        Extracts the port from a URL string.
        Returns the port as a string, or None if not found.
        """
        try:
            parsed = urlparse(url)
            if parsed.port:
                return str(parsed.port)
        except Exception as ex:
            logger.error(f"Error extracting port from URL: {url} - {ex}")
        return None

    def _try_urlparse_extraction(self, url: str):
        """Try to extract hostname using urlparse."""
        try:
            # If URL doesn't have a scheme, add one to help urlparse
            if not any(url.startswith(scheme) for scheme in URLConstants.SUPPORTED_SCHEMES):
                temp_url = URLConstants.HTTP_SCHEME + url
            else:
                temp_url = url
            
            parsed = urlparse(temp_url)
            return parsed.hostname
        except Exception:
            return None

    def _extract_clean_hostname(self, url: str):
        """Extract and clean hostname from URL, handling malformed URLs."""
        temp_url = url
        
        # Remove common protocol prefixes
        for protocol in URLConstants.SUPPORTED_SCHEMES:
            if temp_url.startswith(protocol):
                temp_url = temp_url[len(protocol):]
                break
        
        # Remove path, port, query params, and fragments
        if '/' in temp_url:
            temp_url = temp_url.split('/')[0]
        if ':' in temp_url:
            temp_url = temp_url.split(':')[0]
        if '?' in temp_url:
            temp_url = temp_url.split('?')[0]
        if '#' in temp_url:
            temp_url = temp_url.split('#')[0]
        
        # Clean up any remaining invalid characters
        invalid_chars = ['@', ' ']
        for char in invalid_chars:
            temp_url = temp_url.replace(char, '')
        
        # Return cleaned hostname if valid, None otherwise
        return temp_url.strip() if temp_url.strip() and len(temp_url.strip()) >= 2 else None

    def _extract_domain_suffix(self, host: str):
        """Extract appropriate domain suffix from hostname."""
        # Check if it's an IP address
        ip_pattern = re.compile(r'^(\d{1,3}\.){3}\d{1,3}$|^\[?[a-fA-F0-9:]+\]?$')
        if ip_pattern.match(host):
            return '*.' + host
        
        # For domain names, extract the organizational domain (last 2 parts)
        parts = host.split('.')
        if len(parts) >= 2:
            # Use last 2 parts for organizational domain (e.g., sun.ac.za from gids.sun.ac.za)
            suffix = '.'.join(parts[-2:])
            return '*.' + suffix
        
        return '*.' + host

    def get_related_domains(self, url: str):
        """
        Returns '*.suffix' for FQDNs or '.*IP' for IPs from a URL.
        Handles malformed URLs and extracts clean hostnames.
        """
        try:
            url = url.strip('\'"')
            
            # First try standard urlparse
            host = self._try_urlparse_extraction(url)
            
            # If urlparse failed, try manual extraction and cleaning
            if not host:
                logger.debug(f"urlparse failed for {url}, trying manual extraction")
                host = self._extract_clean_hostname(url)
            
            if not host:
                logger.warning(f"No hostname found in URL to extract relatedDomains: {url}")
                return None
            
            # Extract and return domain suffix
            return self._extract_domain_suffix(host)
            
        except Exception as ex:
            logger.error(f"Error extracting related domains from URL: {url} - {ex}")
            return None

    def _process_lb_vserver_protocol(self, proto, port):
        """Process protocol and type for LB vserver."""
        type_ = ProtocolLiterals.HTTPS if proto in (ProtocolLiterals.SSL, ProtocolLiterals.SSL_BRIDGE) else proto
        
        # Check if we need to treat as TCP
        if (proto in (ProtocolLiterals.SSL, ProtocolLiterals.SSL_BRIDGE, ProtocolLiterals.HTTP) and 
            port not in ("443", "80")):
            type_ = ProtocolLiterals.TCP_STR
        
        return type_

    def _process_lb_vserver_url(self, url, type_, line):
        """Process URL and handle redirects for LB vserver."""
        # Skip if 0.0.0.0 without redirect
        if (url == "0.0.0.0" and 
            not (NSConfPatterns.REDIRECT_URL in line or NSConfPatterns.HTTPS_REDIRECT_URL in line)):
            logger.info(f"Skipping LB vserver with 0.0.0.0 and no redirect: {line}")
            return None, None, None
        
        # Add protocol prefix for HTTP/HTTPS
        if type_ in (Protocol.HTTP.value, Protocol.HTTPS.value):
            url = f"{type_.lower()}://{url}"
        
        # Check for redirect URLs
        redirect_url = (self.get_redirect_url(line, "-redirectURL") or 
                       self.get_redirect_url(line, "-httpsRedirectUrl"))
        
        if redirect_url:
            redirect_url = redirect_url.strip('\'"')
            url = redirect_url
            type_ = (Protocol.HTTPS.value if redirect_url.startswith(URLConstants.HTTPS_SCHEME) 
                    else Protocol.HTTP.value)
            port = self.get_port_from_url(url) or ("443" if type_ == Protocol.HTTPS.value else "80")
        
        return url.strip('\'"'), type_, port

    def _create_lb_vserver_row(self, name, type_, url, port, related_urls):
        """Create row dictionary for LB vserver."""
        is_tcp_udp = type_ in (ProtocolLiterals.TCP_STR, ProtocolLiterals.UDP_STR)
        protocol = type_ if type_ in (Protocol.HTTP.value, Protocol.HTTPS.value) else f"PROTOCOL_{type_}"
        
        return {
            CSVColumns.APP_NAME: name,
            CSVColumns.APP_LOCATION: AppLocation.INSIDE.value,
            CSVColumns.APP_TYPE: AppType.TCP_UDP.value if is_tcp_udp else AppType.HTTP_HTTPS.value,
            CSVColumns.URL: "" if is_tcp_udp else url,
            CSVColumns.RELATED_DOMAINS: "" if is_tcp_udp else related_urls,
            CSVColumns.DESTINATION_PORT_PROTOCOL: f"{url}:{port}:{protocol}" if is_tcp_udp else "",
            CSVColumns.ROUTING_TYPE: "",
            CSVColumns.RESOURCE_LOCATION: "",
            CSVColumns.DESCRIPTION: "",
            CSVColumns.CATEGORY: "",
            CSVColumns.UNIQUE_USERS: 0,
            CSVColumns.TOTAL_VISITS: 0,
            CSVColumns.UNIQUE_USERS_PER_APP: 0
        }

    def _finalize_lb_vserver_row(self, row, name, type_, url, port, protocol):
        """Finalize LB vserver row based on URL type."""
        if is_private_rfc1918(url) or is_valid_fqdn(url) or is_valid_hostname(url):
            lb_vservers[(url, protocol, port)] = name
            return row
        else:
            row[CSVColumns.APP_LOCATION] = AppLocation.OUTSIDE.value
            if type_ in (ProtocolLiterals.TCP_STR, ProtocolLiterals.UDP_STR):
                lb_vservers[(url, protocol, port)] = "uncategorized_apps"
                row[CSVColumns.APP_NAME] = "uncategorized_apps"
                logger.info(f"Adding under uncategorized apps: {name}")
            else:
                lb_vservers[(url, protocol, port)] = name
            return row

    def extract_lb_vserver(self, line: str, pattern: str):
        """
        Extracts lb vserver related info from ns.conf (add lb vserver)
        returns a row which has all the extracted info in csv format
        """
        try:
            if not re.search(pattern, line, re.IGNORECASE):
                return None
                
            match = re.search(NSConfPatterns.LB_VSERVER_DETAIL, line, re.IGNORECASE)
            if not match:
                logger.warning(f"Detailed pattern not matching for LB Vserver command, skipping: {line}")
                return None

            name = match.group("name")
            proto = match.group("protocol")
            url = match.group("ip")
            port = match.group("port")
            
            if port == "*":
                port = "1-65535"
            
            # Process protocol and type
            type_ = self._process_lb_vserver_protocol(proto, port)
            
            # Process URL and handle redirects
            processed_url, processed_type, processed_port = self._process_lb_vserver_url(url, type_, line)
            if processed_url is None:  # Skip case
                return None
            
            url, type_, port = processed_url, processed_type or type_, processed_port or port
            
            # Get related domains for HTTP/HTTPS
            related_urls = ""
            if type_.lower() in (VpnUrlProtocol.HTTP.value, VpnUrlProtocol.HTTPS.value):
                related_urls = self.get_related_domains(url)
            
            protocol = type_ if type_ in (Protocol.HTTP.value, Protocol.HTTPS.value) else f"PROTOCOL_{type_}"
            
            # Create row
            row = self._create_lb_vserver_row(name, type_, url, port, related_urls)
            
            # Finalize row based on URL type
            return self._finalize_lb_vserver_row(row, name, type_, url, port, protocol)
            
        except (TypeError, AttributeError) as ex:
            logger.error(f"{type(ex).__name__} occurred for command: {line} - {ex}")
        except re.error as ex:
            logger.error(f"Regex error occurred for command: {line} - {ex}")
        except Exception as ex:
            logger.error(f"Error extracting application(s) from LB Vserver: {line} - {ex}")
        return None

    def _process_cs_vserver_match(self, line):
        """Process CS vserver match and store in CSVservers."""
        vserver_match = re.search(NSConfPatterns.CS_VSERVER, line, re.IGNORECASE)
        if vserver_match:
            csvserver = vserver_match.group("csvserver")
            protocol = vserver_match.group("protocol")
            if protocol == ProtocolLiterals.SSL:
                protocol = ProtocolLiterals.HTTPS
            port = vserver_match.group("port")
            CSVservers[csvserver] = (protocol, port)

    def _process_cs_policy_match(self, line):
        """Process CS policy match and extract rules."""
        policy_match = re.search(NSConfPatterns.CS_POLICY, line, re.IGNORECASE)
        if policy_match:
            cspolicy = policy_match.group("cspolicy")
            
            eq_rules = [match.group("rule") for match in re.finditer(NSConfPatterns.CS_POLICY_EQ, line)]
            contains_rules = {match.group("rule") for match in re.finditer(NSConfPatterns.CS_POLICY_CONTAINS, line)}
            
            CSPolicies[cspolicy] = (eq_rules, contains_rules)

    def _process_cs_bind_match(self, line):
        """Process CS bind match and associate policies with vservers."""
        bind_match = re.search(NSConfPatterns.CS_VSERVER_BIND, line, re.IGNORECASE)
        if bind_match:
            csvserver = bind_match.group("csvserver")
            cspolicy = bind_match.group("cspolicy")
            if cspolicy in CSPolicies and csvserver in CSVservers:
                if csvserver not in CSVsPolicies:
                    CSVsPolicies[csvserver] = []
                CSVsPolicies[csvserver].append(cspolicy)

    def extract_cs_vserver(self, line):
        """
        Extract cs vserver related info from (add cs vserver, add cs policy, bind cs vserver) lines from ns.conf
        """
        try:
            self._process_cs_vserver_match(line)
            self._process_cs_policy_match(line)
            self._process_cs_bind_match(line)
        except Exception as ex:
            logger.error(f"Error extracting information from CS Vserver: {line} - {ex}")

    def _process_destination_mappings(self, dest, name, p, port, netmask):
        """Process destination and create appropriate mappings."""
        if netmask:
            try:
                subnet = str(ipaddress.ip_network(f"{dest}/{netmask}", strict=False))
                self.subnet_map[(subnet, p, port)] = (name, netmask)
            except Exception as ex:
                logger.error(f"Error calculating subnet for {dest}/{netmask}: {ex}")
        elif '-' in dest and all(is_valid_ip(ip.strip()) for ip in dest.split('-')):
            logger.debug(f"IP range detected: {dest}")
            self.range_map[(dest, p, port)] = name
        elif ',' in dest:
            self._process_multiple_destinations(dest, name, p, port, netmask)
        elif is_valid_ip(dest):
            self.ip_map[(dest, p, port)] = name
        elif is_valid_fqdn(dest) or is_valid_hostname(dest):
            self.fqdn_map[(dest, p, port)] = name
        else:
            logger.warning(f"Unknown destination format: {dest}")
            self.fqdn_map[(dest, p, port)] = name

    def _process_multiple_destinations(self, dest, name, p, port, netmask):
        """Process multiple destinations separated by commas."""
        for d in dest.split(','):
            d = d.strip()
            if is_valid_ip(d):
                self.ip_map[(d, p, port)] = name
            elif is_valid_fqdn(d) or is_valid_hostname(d):
                self.fqdn_map[(d, p, port)] = name
            logger.debug(f"Adding intranet app for multiple dest: {d}")
            self.intranet_apps.append(NSConfApp(name, d, netmask, p, port))

    def _process_intranet_app_line(self, line):
        """Process intranet application line."""
        logger.debug(f"Matched 'add vpn intranetApplication' line: {line}")
        m = re.match(NSConfPatterns.INTRANET_APP, line)
        if not m:
            return
            
        logger.debug(f"Regex match for intranetApplication: {m.groups()}")
        name, proto, dest, netmask, port = m.groups()
        logger.debug(f"Destination parsed: {dest}")
        
        if port == "*":
            port = "1-65535"
        proto = proto.upper()
        protos = [Protocol.TCP.value, Protocol.UDP.value] if proto == "ANY" else [f"PROTOCOL_{proto}"]
        
        for p in protos:
            app = NSConfApp(name, dest, netmask, p, port)
            if proto == "ANY":
                app.is_any_proto = 1
                logger.debug(f"App {name} is ANY protocol, updating name suffix for {p}")
            
            if ',' not in dest:
                logger.debug(f"Adding intranet app: {app}")
                self.intranet_apps.append(app)
            
            self._process_destination_mappings(dest, name, p, port, netmask)

    def _process_dns_suffix_line(self, line):
        """Process DNS suffix line."""
        logger.debug(f"Matched 'add dns suffix' line: {line}")
        m = re.search(NSConfPatterns.DNS_SUFFIX, line)
        if m:
            self.dns_suffixes.add(m.group(1))

    def _process_vpn_url_line(self, line):
        """Process VPN URL line."""
        logger.debug(f"Matched 'add vpn url' line: {line}")
        m = re.match(NSConfPatterns.VPN_URL, line)
        if m:
            name, _, url = m.groups()
            self.vpn_urls.append((name, url))

    def _process_lb_vserver_line(self, line):
        """Process LB vserver line."""
        row = self.extract_lb_vserver(line, "add lb vserver")
        if row:
            self.lb_cs_vservers.append(row)
            logger.info(f"LB vserver extracted and appended: {row.get(CSVColumns.APP_NAME, 'unknown')}")
        else:
            logger.warning(f"LB vserver extraction returned None for line: {line}")

    def _process_cs_vserver_line(self, line):
        """Process CS vserver/policy/bind line."""
        self.extract_cs_vserver(line)

    def _process_bind_line(self, line):
        """Process bind line for intranet applications."""
        app_name = re.search(NSConfPatterns.VPN_BIND, line)
        if app_name:
            self.temp_active_apps.add(app_name.group(1))
        else:
            logger.warning(f"No intranetApplication found in line: {line}")

    def _finalize_active_apps(self):
        """Mark apps as active and build lookup sets."""
        logger.info("Processing intranet apps for active status...")
        for app in self.intranet_apps:
            if app.name in self.temp_active_apps:
                app.is_active = True
                logger.info(f"App {app.name} marked as active.")
                self.active_intranet_apps.add(app.name)
            else:
                logger.debug(f"App {app.name} is not active.")
        
        # Build active apps set for O(1) lookup performance
        self._active_apps_set = {app.name for app in self.intranet_apps if app.is_active}
        logger.info(f"No. of active applications from conf : {len(self._active_apps_set)}")

    def parse(self):
        logger.info(f"Opening ns.conf file: {self.conf_path}")
        try:
            with open(self.conf_path, "r", encoding="utf-8", errors="ignore") as f:
                lines = [line.strip() for line in f if line.strip()]
        except Exception as e:
            logger.error(f"Failed to open ns.conf file: {self.conf_path} - {e}")
            raise
            
        logger.info(f"Parsing {len(lines)} lines from ns.conf...")
        for line in progress_bar(lines, desc="Parsing ns.conf configuration", unit="lines", leave=False):
            if line.startswith("add vpn intranetApplication"):
                self._process_intranet_app_line(line)
            elif line.startswith("add dns suffix"):
                self._process_dns_suffix_line(line)
            elif line.startswith("add vpn url"):
                self._process_vpn_url_line(line)
            elif line.startswith("add lb vserver"):
                self._process_lb_vserver_line(line)
            elif line.startswith(("add cs vserver", "add cs policy", "bind cs vserver")):
                self._process_cs_vserver_line(line)
            elif line.startswith("bind") and "-intranetApplication" in line:
                self._process_bind_line(line)

        self._finalize_active_apps()
        return self.lb_cs_vservers

    def _build_related_domains(self, app):
        """Build related domains string for CS application."""
        related_domains = ""
        for d in CSPolicies[CSVsPolicies[app][0]][0]:
            related_domains += f"*.{d}, "
        return related_domains[:-2] if related_domains else ""

    def _determine_protocol_and_location(self, protocol, port, url):
        """Determine final protocol and application location."""
        # Adjust protocol for non-standard ports
        if (protocol in (ProtocolLiterals.SSL, ProtocolLiterals.HTTP) and 
            port not in ("443", "80")):
            protocol = ProtocolLiterals.TCP_STR
        
        # Determine location based on protocol and URL
        if protocol == ProtocolLiterals.HTTPS:
            url = URLConstants.HTTPS_SCHEME + url
            app_location = AppLocation.INSIDE.value
        elif protocol == ProtocolLiterals.HTTP:
            url = URLConstants.HTTP_SCHEME + url
            app_location = AppLocation.INSIDE.value
        else:
            if is_private_rfc1918(url) or is_valid_fqdn(url) or is_valid_hostname(url):
                app_location = AppLocation.INSIDE.value
            else:
                app_location = AppLocation.OUTSIDE.value
        
        return protocol, url, app_location

    def _create_csv_row(self, app, protocol, port, url, app_location, related_domains):
        """Create CSV row for CS application."""
        is_tcp_udp = protocol in (ProtocolLiterals.TCP_STR, ProtocolLiterals.UDP_STR)
        
        return {
            CSVColumns.APP_NAME: app,
            CSVColumns.APP_LOCATION: app_location,
            CSVColumns.APP_TYPE: AppType.TCP_UDP.value if is_tcp_udp else AppType.HTTP_HTTPS.value,
            CSVColumns.URL: "" if is_tcp_udp else url,
            CSVColumns.RELATED_DOMAINS: "" if is_tcp_udp else related_domains,
            CSVColumns.DESTINATION_PORT_PROTOCOL: f"{url}:{port}:PROTOCOL_{protocol}" if is_tcp_udp else "",
            CSVColumns.ROUTING_TYPE: "",
            CSVColumns.RESOURCE_LOCATION: "",
            CSVColumns.DESCRIPTION: "",
            CSVColumns.CATEGORY: "",
            CSVColumns.UNIQUE_USERS: 0,
            CSVColumns.TOTAL_VISITS: 0,
            CSVColumns.UNIQUE_USERS_PER_APP: 0
        }

    def import_csvs_details(self):
        for app in CSVsPolicies:
            # Get basic data
            protocol = CSVservers[app][0]
            port = CSVservers[app][1]
            url = CSPolicies[CSVsPolicies[app][0]][0][0]
            
            # Build related domains
            related_domains = self._build_related_domains(app)
            
            # Determine protocol and location
            protocol, url, app_location = self._determine_protocol_and_location(protocol, port, url)
            
            # Create and append row
            row = self._create_csv_row(app, protocol, port, url, app_location, related_domains)
            self.lb_cs_vservers.append(row)

    def match_port(self, port, conf_port):
         # Port can be a range, single value, wildcard '*', or None
        if conf_port == '*':
            return True
        if '-' in str(conf_port):
            start_port, end_port = map(int, str(conf_port).split('-'))
            if start_port <= int(port) <= end_port:
                return True
        elif str(conf_port) == str(port):
            return True
        return False

    def match_protocol(self, protocol, conf_protocol):
        if protocol == conf_protocol:
            return True
        return False

    # Get the most specific match for a given IP, protocol, and port
    # if there are multiple matches, return the one with smallest or most specific range
    # for 1st entry calculate best range without comparing
    def _parse_ip_range(self, ip_range):
        """Parse IP range and return start/end octets."""
        try:
            start_ip, end_ip = ip_range.split('-')
            start_octets = [int(x) for x in start_ip.split('.')]
            end_octets = [int(x) for x in end_ip.split('.')]
            return start_octets, end_octets
        except (ValueError, IndexError):
            return None, None

    def _is_ip_in_range(self, ip_octets, start_octets, end_octets):
        """Check if IP is within the specified range."""
        return start_octets <= ip_octets <= end_octets

    def _calculate_range_size(self, start_octets, end_octets):
        """Calculate the size of IP range."""
        try:
            return sum((end_octets[i] - start_octets[i]) * (256 ** (3-i)) for i in range(4))
        except Exception as e:
            logger.error(f"Error calculating range size: {e}")
            return float('inf')  # Return large value for error cases

    def _check_range_match(self, ip, proto, port, ip_range, conf_proto, conf_port):
        """Check if IP, protocol, and port match the range configuration."""
        # Parse IP octets
        try:
            ip_octets = [int(x) for x in ip.split('.')]
        except (ValueError, IndexError):
            return False, None

        # Parse range
        start_octets, end_octets = self._parse_ip_range(ip_range)
        if start_octets is None:
            return False, None

        # Check IP range
        if not self._is_ip_in_range(ip_octets, start_octets, end_octets):
            return False, None

        # Check protocol and port
        is_port_match = self.match_port(port, conf_port)
        is_proto_match = self.match_protocol(proto, conf_proto)
        
        if is_port_match and is_proto_match:
            range_size = self._calculate_range_size(start_octets, end_octets)
            return True, range_size
        
        return False, None

    def get_most_specific_range_match(self, ip, proto, port):
        best_match = None
        best_range = 0
        return_match = None

        for (ip_range, conf_proto, conf_port) in self.range_map:
            try:
                app_name = self.range_map[(ip_range, conf_proto, conf_port)]
                # Only match if app is active
                if app_name not in self._active_apps_set:
                    continue

                is_match, range_size = self._check_range_match(ip, proto, port, ip_range, conf_proto, conf_port)
                if is_match and (best_range == 0 or range_size < best_range):
                    best_match = (ip_range, conf_proto, conf_port)
                    return_match = (ip_range, conf_port, conf_proto)
                    best_range = range_size
                    
            except Exception:
                continue
                
        return (self.range_map[best_match], return_match) if best_match else (None, None)

    def get_longest_prefix_match(self, ip, proto, port):
        best_match = None
        return_match = None
        best_prefix = -1
        
        # Convert input IP to integer once (outside the loop)
        try:
            ip_int = sum(int(octet) << (8 * (3 - i)) for i, octet in enumerate(ip.split('.')))
        except (ValueError, IndexError):
            return (None, None)
            
        for (subnet,p,subnet_port) in self.subnet_map:
            # Only match if app is active - O(1) lookup instead of O(n)
            app_name, _ = self.subnet_map[(subnet, p, subnet_port)]
            if app_name not in self._active_apps_set:
                continue
            try:
                # Fast subnet matching using integer bitwise operations
                subnet_ip, prefixlen = subnet.split('/')
                prefixlen = int(prefixlen)
                
                # Convert subnet IP to 32-bit integer
                subnet_int = sum(int(octet) << (8 * (3 - i)) for i, octet in enumerate(subnet_ip.split('.')))
                
                # Create subnet mask and check if IP is in subnet
                mask = (0xFFFFFFFF << (32 - prefixlen)) & 0xFFFFFFFF
                if (ip_int & mask) == (subnet_int & mask):
                    is_port_match = NSConfParser.match_port(self, port, subnet_port)
                    is_proto_match = NSConfParser.match_protocol(self, p, proto)
                    if is_port_match and is_proto_match and prefixlen > best_prefix:
                        best_match = (subnet, p, subnet_port)
                        return_match = (subnet, subnet_port, p)
                        best_prefix = prefixlen
            except Exception:
                continue
        return (self.subnet_map[best_match], return_match) if best_match else (None, None)

    def get_lb_vserver_match(self, ip, protocol, port):
        result = (None, None)
        # Match protocol and port, and check if ip matches the URL part of the key after removing scheme
        for (url, conf_protocol, conf_port), name in lb_vservers.items():
            # Fast hostname extraction using urlparse or simple logic
            if url.startswith((URLConstants.HTTP_SCHEME, URLConstants.HTTPS_SCHEME)):
                parsed = urlparse(url)
                url_ip = parsed.hostname or parsed.netloc.split(':')[0]
            else:
                # For non-URL strings, extract IP directly
                url_ip = url.split(':')[0]
            protocol_match = NSConfParser.match_protocol(self, protocol.lower(), conf_protocol.lower())
            port_match = NSConfParser.match_port(self, port, conf_port)
            if protocol_match and port_match and ip == url_ip:
                result = (name, (url_ip, conf_port, conf_protocol))
                break
        return result
        
    def get_subnet_match(self, ip, protocol, port):
        match, subnet_details = NSConfParser.get_longest_prefix_match(self, ip, protocol, port)
        return match, subnet_details

    def get_ip_match(self, ip, protocol, port):
        result = (None, None)
        for (ip_key, conf_protocol, conf_port), app_name in self.ip_map.items():
            port_match = NSConfParser.match_port(self, port, conf_port)
            protocol_match = NSConfParser.match_protocol(self, protocol, conf_protocol)
            if ip_key == ip and port_match and protocol_match:
                result = (app_name, (ip_key, conf_port, conf_protocol))
                break
        return result

    def get_fqdn_match(self, fqdn, protocol, port):
        result = (None, None)
        for (fqdn_key, conf_protocol, conf_port), app_name in self.fqdn_map.items():
            port_match = NSConfParser.match_port(self, port, conf_port)
            protocol_match = NSConfParser.match_protocol(self, protocol, conf_protocol)
            # Match wildcards too in fqdn
            if fqdn_key.startswith("*.") and fqdn.lower().endswith(fqdn_key[1:].lower()) and port_match and protocol_match:
                result = (app_name, (fqdn_key, conf_port, conf_protocol))
                break
            if fqdn_key.lower() == fqdn.lower() and port_match and protocol_match:
                result = (app_name, (fqdn_key, conf_port, conf_protocol))
                break
        return result
    
    def get_dns_suffixes(self):
        return self.dns_suffixes

    def get_vpn_urls(self):
        return self.vpn_urls

    def get_active_intranet_apps(self):
        return [a for a in self.intranet_apps if a.is_active]

def _find_matching_regex(line, matched_regex):
    """Find regex that matches the log line."""
    # Fast pre-filter: Only run expensive regex on lines that contain our target keywords
    if not any(keyword in line for keyword in ["HTTPREQUEST", "TCPCONNSTAT", "UDPFLOWSTAT"]):
        return None, 0
    
    if not matched_regex:
        for idx, regex in enumerate(LOG_REGEXES, start=1):
            match = regex.search(line)
            if match:
                return match, idx
        return None, 0
    else:
        regex = LOG_REGEXES[matched_regex - 1]
        return regex.search(line), matched_regex

def _process_log_lines(file_handle, file_name, extracted_data, matched_regex):
    """Process lines from a log file."""
    logger.info(f"Extracting file: {file_name}")
    current_matched_regex = matched_regex
    
    for line in file_handle:
        line = line.strip()
        if not line:
            continue
            
        match, current_matched_regex = _find_matching_regex(line, current_matched_regex)
        if match:
            extracted_data.append(match.groupdict())
    
    return current_matched_regex

def _process_gzipped_file(file_path, file_name, extracted_data, matched_regex):
    """Process a gzipped log file."""
    try:
        with gzip.open(file_path, "rt", encoding="utf-8", errors="ignore") as file:
            logger.info(f"Extracting gzipped file: {file_name}")
            return _process_log_lines(file, file_name, extracted_data, matched_regex)
    except Exception as ex:
        logger.error(f"Error reading gzipped log file {file_name}: {ex}")
        return matched_regex

def _process_regular_file(file_path, file_name, extracted_data, matched_regex):
    """Process a regular log file."""
    try:
        with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
            return _process_log_lines(file, file_name, extracted_data, matched_regex)
    except Exception as ex:
        logger.error(f"Error reading log file {file_name}: {ex}")
        return matched_regex

def _process_single_file(file_name, log_dir, extracted_data, matched_regex):
    """Process a single log file."""
    file_path = os.path.join(log_dir, file_name)
    logger.info(f"Processing log file: {file_path}")
    
    if file_name.endswith(".gz"):
        return _process_gzipped_file(file_path, file_name, extracted_data, matched_regex)
    else:
        return _process_regular_file(file_path, file_name, extracted_data, matched_regex)

def extract_logs():
    extracted_data = []
    matched_regex = 0
    logger.info(f"Extracting logs from directory: {LOG_DIR}")
    log_files = [f for f in os.listdir(LOG_DIR) if f.startswith(LOG_PATTERN)]
    
    print(f"Found {len(log_files)} log files to process")
    for file_name in progress_bar(log_files, desc="Processing log files", unit="files", leave=False):
        matched_regex = _process_single_file(file_name, LOG_DIR, extracted_data, matched_regex)
    
    logger.info(f"Extracted {len(extracted_data)} log entries.")
    return extracted_data, matched_regex

def is_private_rfc1918(ip_str):
    try:
        ip = ipaddress.ip_address(ip_str)
        return ip.is_private
    except Exception:
        return False

# Fast address type classification to replace expensive classify_address_type function
def is_ip_fast(s):
    """Fast IP check without full ipaddress parsing"""
    parts = str(s).split('.')
    if len(parts) != 4:
        return False
    try:
        return all(0 <= int(part) <= 255 for part in parts)
    except (ValueError, TypeError):
        return False

def is_fqdn_fast(s):
    """Fast FQDN check"""
    s_str = str(s)
    return '.' in s_str and len(s_str.split('.')) >= 2 and not is_ip_fast(s_str)

def is_hostname_fast(s):
    """Fast hostname check"""
    s_str = str(s)
    return s_str.replace('-', '').replace('_', '').isalnum() and '.' not in s_str

def classify_address_type(s):
    try:
        ipaddress.ip_address(s)
        return 'ip'
    except Exception:
        pass
    try:
        fqdn_regex = re.compile(r"^(?=.{1,253}$)(?:[a-zA-Z0-9-]{1,63}\.)+[a-zA-Z]{2,63}$")
        if fqdn_regex.match(s):
            return 'fqdn'
        hostname_regex = re.compile(r"^(?!-)[A-Za-z0-9-]{1,63}(?<!-)$")
        if all(hostname_regex.match(part) for part in s.split('.')):
            return 'hostname'
        logger.debug(f"Address is unknown type: {s}")
        return 'unknown'
    except Exception as ex:
        logger.error(f"Error classifying address type for {s}: {ex}")
        return 'unknown'

def get_ip_class(ip):
    try:
        first_octet = int(ip.split(".")[0])
        if 1 <= first_octet <= 126:
            return IpClass.CLASS_A.value
        elif 128 <= first_octet <= 191:
            return IpClass.CLASS_B.value
        elif 192 <= first_octet <= 223:
            return IpClass.CLASS_C.value
        else:
            logger.debug(f"IP class Unknown: {ip}")
            return IpClass.UNKNOWN.value
    except Exception as ex:
        logger.error(f"Error getting IP class for {ip}: {ex}")
        return IpClass.UNKNOWN.value

def get_subnet(ip, ip_class):
    try:
        octets = ip.split(".")
        if ip_class == "A":
            subnet = "%s.%s.0.0/16" % (octets[0], octets[1])
        elif ip_class == "B":
            subnet = "%s.%s.0.0/16" % (octets[0], octets[1])
        elif ip_class == "C":
            subnet = "%s.%s.%s.0/24" % (octets[0], octets[1], octets[2])
        else:
            subnet = "Unknown"
        logger.debug(f"Subnet for IP {ip}: {subnet}")
        return subnet
    except Exception as ex:
        logger.error(f"Error getting subnet for IP {ip}, class {ip_class}: {ex}")
        return "Unknown"

def is_valid_ip(ip):
    try:
        ipaddress.ip_address(ip)
        return True
    except Exception:
        return False
    
def is_valid_fqdn(dest):
    try:
        if classify_address_type(dest) == 'fqdn':
            return True
        if dest.startswith("*.") and len(dest.split(".")) >= 3:
            return True
        return False
    except Exception:
        return False

def is_valid_hostname(dest):
    try:
        if classify_address_type(dest) == 'hostname':
            return True
        return False
    except Exception:
        return False


# Helper function to update user/app stats
def _process_app_name_stats(app_name, user, key):
    """Process app name statistics."""
    if not app_name:
        return None
    
    # Apply app name occurrence limiting
    destination = key if app_name == "uncategorized_apps" else None
    final_app_name = get_limited_app_name(app_name, destination)
    app_unique_users[final_app_name].add(user)
    return final_app_name

def _build_element_key(final_app_name, url, key):
    """Build element key for unique users tracking."""
    el = url or key
    return final_app_name + ("_" + el) if el else final_app_name

def _update_visit_stats_with_time(el, user, visit_time, final_app_name):
    """Update visit statistics when visit_time is available."""
    last_visit = app_user_last_visit.get((el, user))
    if not last_visit or (visit_time - last_visit) > timedelta(minutes=TimeConstants.VISIT_TIME_INTERVAL):
        total_visits[el] += 1
        app_user_last_visit[(el, user)] = visit_time
        if final_app_name:
            app_total_visits[final_app_name] += 1

def _update_visit_stats_no_time(el, final_app_name):
    """Update visit statistics when no visit_time is available."""
    if el:
        total_visits[el] += 1
    if final_app_name:
        app_total_visits[final_app_name] += 1

def _update_app_name_mapping(app_name_map, key, app_name, details):
    """Update app_name_map and maintain reverse mapping for destination count per app."""
    # Only count if this is a new mapping
    if key not in app_name_map:
        # Check if we need to apply the 100-destination limit (only for TCP/UDP apps)
        final_app_name = _get_app_name_with_suffix(app_name)
        
        # Update reverse mapping - count unique destinations per app (only for TCP/UDP apps)
        if "_tcp_udp" in app_name:
            app_destination_count[final_app_name] += 1
        
        # Update the main mapping with the potentially suffixed app name
        app_name_map[key] = (final_app_name, details)
    else:
        # Update the main mapping without changing counts
        app_name_map[key] = (app_name, details)

def _get_app_name_with_suffix(original_app_name):
    """
    Get app name with suffix if it exceeds 100 destinations limit.
    Only applies to TCP/UDP apps (those containing '_tcp_udp').
    Returns the appropriate app name with suffix (_1, _2, etc.) or original name.
    """
    # Don't apply suffix logic for uncategorized_apps as they have their own unique naming
    if original_app_name == "uncategorized_apps":
        return original_app_name
    
    # Only apply 100-destination limit to TCP/UDP apps
    if "_tcp_udp" not in original_app_name:
        return original_app_name
    
    # Check current destination count for the base app name
    current_count = app_destination_count.get(original_app_name, 0)
    
    # If under limit, use original name
    if current_count < 100:
        return original_app_name
    
    # Find the next available suffix
    suffix_num = 1
    while True:
        suffixed_name = f"{original_app_name}_{suffix_num}"
        suffixed_count = app_destination_count.get(suffixed_name, 0)
        
        # If this suffixed name has room for more destinations, use it
        if suffixed_count < 100:
            return suffixed_name
        
        # Otherwise, try the next suffix
        suffix_num += 1
        
        # Safety check to avoid infinite loop (though unlikely in practice)
        if suffix_num > 1000:
            logger.warning(f"App {original_app_name} has exceeded 100,000 destinations, using suffix _{suffix_num}")
            return f"{original_app_name}_{suffix_num}"

def update_user_app_stats(app_name, user, url=None, key=None, visit_time=None):
    """
    Optimized stats tracking with app name occurrence limiting.
    """
    # Process app name and get final name
    final_app_name = _process_app_name_stats(app_name, user, key)
    if not final_app_name:
        return
    
    # Build element key and update unique users
    el = _build_element_key(final_app_name, url, key)
    if el:
        unique_users[el].add(user)
    
    # Update visit statistics based on whether visit_time is available
    if visit_time and el:
        _update_visit_stats_with_time(el, user, visit_time, final_app_name)
    elif not visit_time:
        _update_visit_stats_no_time(el, final_app_name)


def get_limited_app_name(original_app_name, destination=None):
    """
    Get a limited app name. For uncategorized_apps, creates unique names based on destination.
    For now, disabled suffix logic to fix the aggregation issue.
    """
    # Special handling for uncategorized_apps - create unique names based on destination
    if original_app_name == "uncategorized_apps" and destination:
        # Create a unique app name for each uncategorized destination
        unique_app_name = f"uncategorized_apps_{destination.replace(':', '_').replace('/', '_')}"
        return unique_app_name
    
    # For all other apps, return the original name
    return original_app_name


def parse_nslog_timestamp(syslog_prefix, timestamp_str):
    """
    Resolves ambiguity between dd/mm and mm/dd in timestamp_str using syslog_prefix.
    Example:
        syslog_prefix: 'Aug 7 08:00:00'
        timestamp_str: '08/07/2024:08:00:00 GMT'
    Returns:
        datetime object or None
    """
    try:
        # Extract month and day from syslog prefix
        parts = syslog_prefix.split()
        if len(parts) < 2:
            return None
        month_str = parts[0]
        day_str = parts[1]
        month_num = list(month_abbr).index(month_str[:3])
        _ = int(day_str)

        # Extract first and second numbers from timestamp
        date_part = timestamp_str.split(":")[0]
        nums = date_part.split("/")
        if len(nums) < 3:
            return None
        first = int(nums[0])
        second = int(nums[1])
        _ = int(nums[2])

        # Decide format
        if first == month_num or second == month_num:
            if first == month_num:
                # Format is mm/dd/yyyy
                fmt = "%m/%d/%Y:%H:%M:%S GMT"
            else:
                # Format is dd/mm/yyyy
                fmt = "%d/%m/%Y:%H:%M:%S GMT"
            return datetime.strptime(timestamp_str, fmt)
        else:
            # Fallback: try both
            try:
                return datetime.strptime(timestamp_str, "%d/%m/%Y:%H:%M:%S GMT")
            except Exception:
                return datetime.strptime(timestamp_str, "%m/%d/%Y:%H:%M:%S GMT")
    except Exception as ex:
        logger.warning(f"Ambiguous timestamp parsing failed for '{syslog_prefix}' and '{timestamp_str}': {ex}")
        return None

def _format_details(details):
    """Format details tuple as string."""
    if details and isinstance(details, tuple):
        return ":".join(str(x) for x in details if x is not None)
    return details

def _handle_ip_match(ip_match, ip_details, is_private_ip, is_fqdn, dest_key, user, visit_time, app_name_map):
    """Handle IP match result."""
    if not ip_match:
        return False
    
    # Check if this destination is already mapped to prevent duplicates
    if dest_key in app_name_map:
        existing_app_name, existing_key = app_name_map[dest_key]
        update_user_app_stats(existing_app_name, user, key=existing_key, visit_time=visit_time)
        return True
    
    formatted_details = _format_details(ip_details)
    
    if is_private_ip or is_fqdn:
        _update_app_name_mapping(app_name_map, dest_key, ip_match, formatted_details)
        update_user_app_stats(ip_match, user, key=formatted_details, visit_time=visit_time)
        return True
    elif not is_private_ip and not is_fqdn:
        _update_app_name_mapping(app_name_map, dest_key, "uncategorized_apps", formatted_details)
        update_user_app_stats("uncategorized_apps", user, key=formatted_details, visit_time=visit_time)
        return True
    
    return False

def _handle_fqdn_match(nsconf_parser, destination_ip, protocol, destination_port, dest_key, user, visit_time, app_name_map):
    """Handle FQDN match."""
    fqdn_match, fqdn_details = nsconf_parser.get_fqdn_match(destination_ip, protocol, destination_port)
    if fqdn_match:
        # Check if this destination is already mapped to prevent duplicates
        if dest_key in app_name_map:
            existing_app_name, existing_key = app_name_map[dest_key]
            update_user_app_stats(existing_app_name, user, key=existing_key, visit_time=visit_time)
            return True
            
        formatted_details = _format_details(fqdn_details)
        _update_app_name_mapping(app_name_map, dest_key, fqdn_match, formatted_details)
        update_user_app_stats(fqdn_match, user, key=formatted_details, visit_time=visit_time)
        return True
    return False

def _handle_lb_vserver_match(nsconf_parser, destination_ip, protocol, destination_port, dest_key, user, visit_time, app_name_map):
    """Handle LB vserver match."""
    lb_vserver_match, lb_vserver_details = nsconf_parser.get_lb_vserver_match(destination_ip, protocol, destination_port)
    if not lb_vserver_match:
        return False
    
    formatted_details = _format_details(lb_vserver_details)
    
    if protocol in (ProtocolLiterals.HTTPS, ProtocolLiterals.HTTP):
        url = f"{protocol.lower()}://{destination_ip}"
        # Check if this URL is already mapped to prevent duplicates
        if url in app_name_map:
            existing_app_name, existing_key = app_name_map[url]
            update_user_app_stats(existing_app_name, user, key=existing_key, visit_time=visit_time)
        else:
            _update_app_name_mapping(app_name_map, url, lb_vserver_match, url)
            update_user_app_stats(lb_vserver_match, user, key=url, visit_time=visit_time)
    else:
        # Check if this destination is already mapped to prevent duplicates
        if dest_key in app_name_map:
            existing_app_name, existing_key = app_name_map[dest_key]
            update_user_app_stats(existing_app_name, user, key=existing_key, visit_time=visit_time)
        else:
            _update_app_name_mapping(app_name_map, dest_key, lb_vserver_match, formatted_details)
            update_user_app_stats(lb_vserver_match, user, key=formatted_details, visit_time=visit_time)
    
    return True

def _clean_subnet_details(subnet_details):
    """Clean CIDR notation from subnet details."""
    if ':' in subnet_details and '/' in subnet_details:
        parts = subnet_details.split(':')
        cleaned_parts = []
        for part in parts:
            if '/' in part and any(c.isdigit() for c in part.split('/')[0]):
                cleaned_parts.append(part.split('/')[0])
            else:
                cleaned_parts.append(part)
        return ':'.join(cleaned_parts)
    return subnet_details

def _handle_range_subnet_matches(nsconf_parser, destination_ip, protocol, destination_port, is_private_ip, dest_key, user, visit_time, app_name_map):
    """Handle range and subnet matches for non-FQDN destinations."""
    # Check if this destination is already mapped to prevent duplicates
    if dest_key in app_name_map:
        existing_app_name, existing_key = app_name_map[dest_key]
        update_user_app_stats(existing_app_name, user, key=existing_key, visit_time=visit_time)
        return True
        
    # Check range match
    range_match, range_details = nsconf_parser.get_most_specific_range_match(destination_ip, protocol, destination_port)
    if range_match:
        formatted_details = _format_details(range_details)
        app_name = range_match if is_private_ip else "uncategorized_apps"
        _update_app_name_mapping(app_name_map, dest_key, app_name, formatted_details)
        update_user_app_stats(app_name, user, key=formatted_details, visit_time=visit_time)
        return True
    
    # Check subnet match
    subnet_match, subnet_details = nsconf_parser.get_subnet_match(destination_ip, protocol, destination_port)
    if subnet_match:
        formatted_details = _format_details(subnet_details)
        if formatted_details:
            formatted_details = _clean_subnet_details(formatted_details)
        
        app_name = subnet_match[0] if is_private_ip else "uncategorized_apps"
        _update_app_name_mapping(app_name_map, dest_key, app_name, formatted_details)
        update_user_app_stats(app_name, user, key=formatted_details, visit_time=visit_time)
        return True
    
    return False

def match_and_update_app(destination_ip, protocol, destination_port, user, visit_time, nsconf_parser, app_name_map, is_fqdn=False, is_private_ip=False):
    # Cache the key for app_name_map to avoid recalculating
    dest_key = f"{destination_ip}:{destination_port}:{protocol}"
    
    # Check IP match first (most common case)
    ip_match, ip_details = nsconf_parser.get_ip_match(destination_ip, protocol, destination_port)
    if _handle_ip_match(ip_match, ip_details, is_private_ip, is_fqdn, dest_key, user, visit_time, app_name_map):
        return True
    
    # Check FQDN match only if is_fqdn is True
    if is_fqdn and _handle_fqdn_match(nsconf_parser, destination_ip, protocol, destination_port, dest_key, user, visit_time, app_name_map):
        return True
    
    # Optimize protocol assignment for common ports
    if destination_port == '443':
        protocol = ProtocolLiterals.HTTPS
    elif destination_port == '80':
        protocol = ProtocolLiterals.HTTP
    
    # Check LB vserver match
    if _handle_lb_vserver_match(nsconf_parser, destination_ip, protocol, destination_port, dest_key, user, visit_time, app_name_map):
        return True
    
    # Only check range/subnet for non-FQDN
    if not is_fqdn:
        return _handle_range_subnet_matches(nsconf_parser, destination_ip, protocol, destination_port, is_private_ip, dest_key, user, visit_time, app_name_map)
    
    return False

def _parse_visit_time(syslog_prefix, timestamp, matched_regex):
    """Parse visit time from log entry."""
    try:
        if matched_regex == 1:
            return parse_nslog_timestamp(syslog_prefix, timestamp)
        elif matched_regex == 2:
            return parse_nslog_timestamp(syslog_prefix, timestamp + " GMT")
        elif matched_regex == 3:
            date_part, time_part = timestamp.split(":", 1)
            year, day, month = date_part.split("/")
            rearranged_timestamp = f"{month}/{day}/{year}:{time_part}"
            return parse_nslog_timestamp(syslog_prefix, rearranged_timestamp)
    except Exception as ex:
        logger.warning(f"Error parsing timestamp: {timestamp}, Exception: {ex}")
    return None

def _should_skip_log(destination_ip, destination_port):
    """Check if log should be skipped based on destination."""
    LOOPBACK_IP = "127.0.0.1"
    DNS_PORT = "53"
    ZERO_PORT = "0"
    
    return (destination_ip == LOOPBACK_IP or 
            destination_port == DNS_PORT or 
            destination_port == ZERO_PORT)

def _normalize_protocol(protocol, http_val):
    """Normalize protocol value."""
    if protocol == LogType.TCPCONNSTAT.value:
        return Protocol.TCP.value
    elif protocol == LogType.UDPFLOWSTAT.value:
        return Protocol.UDP.value
    elif http_val:
        return Protocol.HTTPS.value
    else:
        return "UNKNOWN"

def _process_ip_http_val(http_val, vpn_urls, user, visit_time, nsconf_parser, app_name_map):
    """Process HTTP value when it's an IP address."""
    HTTPS_PREFIX = URLConstants.HTTPS_SCHEME
    
    url = HTTPS_PREFIX + str(http_val)
    # Use centralized function for consistent domain pattern generation
    related_domains = nsconf_parser.get_related_domains(str(http_val)) or f"*.{http_val}"
    
    # Check if IP is in VPN URLs
    http_val_str = str(http_val)
    for name, vpn_url in vpn_urls:
        if http_val_str in vpn_url:
            is_private_ip = is_private_rfc1918(http_val)
            app_location = AppLocation.INSIDE.value if is_private_ip else AppLocation.OUTSIDE.value
            _update_app_name_mapping(app_name_map, vpn_url, name, vpn_url)
            update_user_app_stats(name, user, url=vpn_url, key=None, visit_time=visit_time)
            # Use centralized function for VPN URL related domains
            vpn_related_domains = nsconf_parser.get_related_domains(vpn_url) or related_domains
            return name, app_location, AppType.HTTP_HTTPS.value, vpn_url, vpn_related_domains, ""
    
    # Check if matches configured app
    is_private_ip = is_private_rfc1918(http_val)
    if match_and_update_app(http_val, Protocol.HTTPS.value, '443', user, visit_time, nsconf_parser, app_name_map, is_fqdn=False, is_private_ip=is_private_ip):
        return None, None, None, None, None, None  # Signal to continue
    
    # Fallback based on IP type
    if not is_private_ip:
        app_name = f"{http_val}_https_saas"
        app_location = AppLocation.OUTSIDE.value
        app_type = AppType.SAAS.value
    else:
        app_name = f"{str(http_val).replace('/', '_')}_https_web"
        app_location = AppLocation.INSIDE.value
        app_type = AppType.HTTP_HTTPS.value
    
    _update_app_name_mapping(app_name_map, url, app_name, url)
    # Get the actual app name that was stored (with potential suffix)
    final_app_name, _ = app_name_map[url]
    update_user_app_stats(final_app_name, user, url=url, key=None, visit_time=visit_time)
    return final_app_name, app_location, app_type, url, related_domains, ""

def _process_fqdn_http_val(http_val, vpn_urls, user, visit_time, nsconf_parser, app_name_map):
    """Process HTTP value when it's an FQDN."""
    HTTPS_PREFIX = URLConstants.HTTPS_SCHEME
    
    http_val_str = str(http_val)
    url = HTTPS_PREFIX + http_val_str
    
    # Use centralized function for consistent domain pattern generation
    related_domains = nsconf_parser.get_related_domains(http_val_str) or f"*.{http_val_str}"
    
    # Check if FQDN is in VPN URLs
    for name, vpn_url in vpn_urls:
        if http_val_str in vpn_url:
            _update_app_name_mapping(app_name_map, vpn_url, name, vpn_url)
            update_user_app_stats(name, user, url=vpn_url, key=None, visit_time=visit_time)
            # Use centralized function for VPN URL related domains
            vpn_related_domains = nsconf_parser.get_related_domains(vpn_url) or related_domains
            return name, AppLocation.INSIDE.value, AppType.HTTP_HTTPS.value, vpn_url, vpn_related_domains, ""
    
    # Check if matches configured app
    if match_and_update_app(http_val, Protocol.HTTPS.value, '443', user, visit_time, nsconf_parser, app_name_map, is_fqdn=True, is_private_ip=False):
        return None, None, None, None, None, None  # Signal to continue
    
    # Default FQDN handling
    app_name = http_val.replace(".", "_")
    _update_app_name_mapping(app_name_map, url, app_name, url)
    # Get the actual app name that was stored (with potential suffix)
    final_app_name, _ = app_name_map[url]
    update_user_app_stats(final_app_name, user, url=url, key=None, visit_time=visit_time)
    return final_app_name, AppLocation.OUTSIDE.value, AppType.HTTP_HTTPS.value, url, related_domains, ""

def _process_unknown_http_val(http_val, user, visit_time, app_name_map, nsconf_parser):
    """Process unknown address type in HTTP value."""
    http_val_str = str(http_val)
    
    # Extract just the hostname part if http_val contains port or path
    hostname = http_val_str
    if ':' in hostname:
        hostname = hostname.split(':')[0]  # Remove port and everything after
    if '/' in hostname:
        hostname = hostname.split('/')[0]  # Remove path and everything after
    
    url = http_val_str  # Keep original for URL
    # Use centralized function for consistent domain pattern generation
    related_domains = nsconf_parser.get_related_domains(hostname) or f"*.{hostname}"
    app_name = hostname.replace(".", "_")  # Use clean hostname for app name
    _update_app_name_mapping(app_name_map, url, app_name, url)
    # Get the actual app name that was stored (with potential suffix)
    final_app_name, _ = app_name_map[url]
    update_user_app_stats(final_app_name, user, url=url, key=None, visit_time=visit_time)
    return final_app_name, AppLocation.OUTSIDE.value, AppType.HTTP_HTTPS.value, url, related_domains, ""

def _process_destination_ip_fallback(destination_ip, destination_port, user, visit_time, app_name_map, nsconf_parser):
    """Process destination IP fallback for HTTP/HTTPS."""
    HTTPS_PREFIX = URLConstants.HTTPS_SCHEME
    HTTP_PREFIX = URLConstants.HTTP_SCHEME
    
    is_private_ip = is_private_rfc1918(destination_ip)
    
    if destination_port == '443':
        app_name = f"{destination_ip}_https_{'saas' if not is_private_ip else 'web'}"
        url = HTTPS_PREFIX + destination_ip
    elif destination_port == '80':
        app_name = f"{destination_ip}_http_{'saas' if not is_private_ip else 'web'}"
        url = HTTP_PREFIX + destination_ip
    else:
        return None, None, None, None, None, None
    
    app_location = AppLocation.OUTSIDE.value if not is_private_ip else AppLocation.INSIDE.value
    app_type = AppType.SAAS.value if not is_private_ip else AppType.HTTP_HTTPS.value
    # Use centralized function for consistent domain pattern generation
    related_domains = nsconf_parser.get_related_domains(str(destination_ip)) or f"*.{destination_ip}"
    _update_app_name_mapping(app_name_map, url, app_name, url)
    # Get the actual app name that was stored (with potential suffix)
    final_app_name, _ = app_name_map[url]
    update_user_app_stats(final_app_name, user, url=url, key=None, visit_time=visit_time)
    return final_app_name, app_location, app_type, url, related_domains, ""

def _process_http_https_protocol(log, protocol, destination_ip, destination_port, user, visit_time, vpn_urls, nsconf_parser, app_name_map):
    """Process HTTP/HTTPS protocol logs."""
    http_val = log.get("http_res", "")
    
    if http_val:
        # Classify address type and delegate processing
        if is_ip_fast(str(http_val)):
            return _process_ip_http_val(http_val, vpn_urls, user, visit_time, nsconf_parser, app_name_map)
        elif is_fqdn_fast(str(http_val)):
            return _process_fqdn_http_val(http_val, vpn_urls, user, visit_time, nsconf_parser, app_name_map)
        elif is_hostname_fast(str(http_val)):
            return None, None, None, None, None, True  # Signal hostname processing
        else:
            return _process_unknown_http_val(http_val, user, visit_time, app_name_map, nsconf_parser)
    else:
        # No http_val, process destination IP
        is_private_ip = is_private_rfc1918(destination_ip)
        if match_and_update_app(destination_ip, protocol, destination_port, user, visit_time, nsconf_parser, app_name_map, is_fqdn=False, is_private_ip=is_private_ip):
            return None, None, None, None, None, None  # Signal to continue
        
        return _process_destination_ip_fallback(destination_ip, destination_port, user, visit_time, app_name_map, nsconf_parser)

def _process_tcp_udp_protocol(destination_ip, protocol, destination_port, user, visit_time, nsconf_parser, app_name_map):
    """Process TCP/UDP protocol logs."""
    is_private_ip = is_private_rfc1918(destination_ip)
    if match_and_update_app(destination_ip, protocol, destination_port, user, visit_time, nsconf_parser, app_name_map, is_fqdn=False, is_private_ip=is_private_ip):
        return None, None, None, None, None, None  # Signal to continue
    
    key = f"{destination_ip}:{destination_port}:{protocol}"
    
    # Check if this destination is already mapped to an app to prevent duplicates
    if key in app_name_map:
        existing_app_name, existing_key = app_name_map[key]
        update_user_app_stats(existing_app_name, user, key=existing_key, visit_time=visit_time)
        return existing_app_name, AppLocation.INSIDE.value if is_private_rfc1918(destination_ip) else AppLocation.OUTSIDE.value, AppType.TCP_UDP.value, "", "", ""
    
    # Fallback logic - only called for new destinations
    if not is_private_ip:
        app_name = "uncategorized_apps"
        app_location = AppLocation.OUTSIDE.value
    elif destination_port in ['1494', '2598']:
        ip_class = get_ip_class(destination_ip)
        subnet = get_subnet(destination_ip, ip_class)
        base_app_name = f"hdx_resources_{subnet.replace('/', '_')}"
        app_name = get_limited_app_name(base_app_name, key)
        app_location = AppLocation.INSIDE.value
    else:
        ip_class = get_ip_class(destination_ip)
        subnet = get_subnet(destination_ip, ip_class)
        base_app_name = f"{subnet.replace('/', '_')}_tcp_udp"
        app_name = get_limited_app_name(base_app_name, key)
        app_location = AppLocation.INSIDE.value
    
    _update_app_name_mapping(app_name_map, key, app_name, key)
    # Get the actual app name that was stored (with potential suffix)
    final_app_name, _ = app_name_map[key]
    update_user_app_stats(final_app_name, user, key=key, visit_time=visit_time)
    return final_app_name, app_location, AppType.TCP_UDP.value, "", "", ""

def _process_hostname_dns_suffixes(http_val, dns_suffixes, user, visit_time, app_name_map, nsconf_parser):
    """Process hostname with DNS suffixes."""
    rows = []
    if len(dns_suffixes) == 0 or len(dns_suffixes) > 1:
        logger.info(f"Skipping creating application for hostname {http_val} due to multiple DNS suffixes")
        return rows
    
    # Extract just the hostname part if http_val contains port or path
    hostname = http_val
    if ':' in hostname:
        hostname = hostname.split(':')[0]  # Remove port and everything after
    if '/' in hostname:
        hostname = hostname.split('/')[0]  # Remove path and everything after
    
    for suffix in dns_suffixes:
        base_app_name = f"{hostname.replace('.', '_')}_{suffix.replace('.', '_')}_dns_suffix_app"
        app_name = get_limited_app_name(base_app_name)
        url = f"{URLConstants.HTTPS_SCHEME}{hostname}.{suffix}"
        # Use centralized function for consistent domain pattern generation
        related_domains = nsconf_parser.get_related_domains(suffix) or f'*.{suffix}'
        
        update_user_app_stats(app_name, user, url=url, key=None, visit_time=visit_time)
        
        row = {
            CSVColumns.APP_NAME: app_name,
            CSVColumns.APP_LOCATION: AppLocation.OUTSIDE.value,
            CSVColumns.APP_TYPE: AppType.HTTP_HTTPS.value,
            CSVColumns.URL: url,
            CSVColumns.RELATED_DOMAINS: related_domains,
            CSVColumns.DESTINATION_PORT_PROTOCOL: "",
            CSVColumns.ROUTING_TYPE: "",
            CSVColumns.RESOURCE_LOCATION: "",
            CSVColumns.DESCRIPTION: "",
            CSVColumns.CATEGORY: "",
            CSVColumns.UNIQUE_USERS: 0,
            CSVColumns.TOTAL_VISITS: 0,
            CSVColumns.UNIQUE_USERS_PER_APP: 0
        }
        rows.append((app_name, user, row))
        _update_app_name_mapping(app_name_map, url + "." + suffix, app_name, url)
    
    return rows

def _create_row_data(app_name, app_location, app_type, url, related_domains, destination_ip, destination_port, protocol):
    """Create row data dictionary."""
    return {
        CSVColumns.APP_NAME: app_name,
        CSVColumns.APP_LOCATION: app_location,
        CSVColumns.APP_TYPE: app_type,
        CSVColumns.URL: url,
        CSVColumns.RELATED_DOMAINS: related_domains,
        CSVColumns.DESTINATION_PORT_PROTOCOL: f"{destination_ip}:{destination_port}:{protocol}" if app_type == AppType.TCP_UDP.value else "",
        CSVColumns.ROUTING_TYPE: "",
        CSVColumns.RESOURCE_LOCATION: "",
        CSVColumns.DESCRIPTION: "",
        CSVColumns.CATEGORY: "",
        CSVColumns.UNIQUE_USERS: 0,
        CSVColumns.TOTAL_VISITS: 0,
        CSVColumns.UNIQUE_USERS_PER_APP: 0
    }

def _process_single_log(log, matched_regex, dns_suffixes, vpn_urls, nsconf_parser, app_name_map, rows):
    """Process a single log entry."""
    destination_ip = log.get("destination_ip", "")
    destination_port = log.get("destination_port", "")
    
    # Skip certain logs
    if _should_skip_log(destination_ip, destination_port):
        return
    
    protocol = _normalize_protocol(log.get("protocol", ""), log.get("http_res", ""))
    user = log.get("user_tcp") or log.get("user_http") or "Unknown"
    syslog_prefix = log.get("syslog_prefix_tcp_udp") or log.get("syslog_prefix_http") or "Unknown"
    timestamp = log.get("timestamp_tcp_udp") or log.get("timestamp_http") or ""
    visit_time = _parse_visit_time(syslog_prefix, timestamp, matched_regex)
    
    # Determine if this is HTTP/HTTPS protocol
    is_http_protocol = ((protocol == Protocol.TCP.value and destination_port in ['80', '443']) or 
                       protocol == ProtocolLiterals.HTTPS or log.get("http_res", ""))
    
    if is_http_protocol:
        result = _process_http_https_protocol(log, protocol, destination_ip, destination_port, 
                                            user, visit_time, vpn_urls, nsconf_parser, app_name_map)
        
        if result[0] is None and result[5] is None:  # Continue signal
            return
        elif result[5] is True:  # Hostname processing needed
            hostname_rows = _process_hostname_dns_suffixes(log.get("http_res", ""), dns_suffixes, 
                                                         user, visit_time, app_name_map, nsconf_parser)
            rows.extend(hostname_rows)
            return
        
        app_name, app_location, app_type, url, related_domains, _ = result
    else:
        result = _process_tcp_udp_protocol(destination_ip, protocol, destination_port, 
                                         user, visit_time, nsconf_parser, app_name_map)
        if result[0] is None:  # Continue signal
            return
        
        app_name, app_location, app_type, url, related_domains, _ = result
    
    # Create row for non-hostname entries
    app_key = (app_name, app_type, url, related_domains) if app_type != AppType.TCP_UDP.value else (app_name, app_type, f"{destination_ip}:{destination_port}:{protocol}")
    
    row = _create_row_data(app_name, app_location, app_type, url, related_domains, destination_ip, destination_port, protocol)
    rows.append((app_key, user, row))

def process_logs(logs, nsconf_parser, matched_regex):
    logger.info("Starting process_logs function.")
    dns_suffixes = nsconf_parser.get_dns_suffixes()
    vpn_urls = nsconf_parser.get_vpn_urls()
    
    rows = []
    app_name_map = {}
    logger.info(f"Processing {len(logs)} logs.")
    print(f"Processing {len(logs)} log entries to generate applications...")
    
    # Convert logs to a list if it's a generator, so progress_bar only wraps once
    logs_list = list(logs) if not isinstance(logs, list) else logs
    for log in progress_bar(logs_list, desc="Analyzing log entries", unit="entries", leave=False, mininterval=5.0):
        _process_single_log(log, matched_regex, dns_suffixes, vpn_urls, nsconf_parser, app_name_map, rows)
    
    print(f"\nCompleted processing {len(logs)} log entries!")
    
    # Aggregate final rows
    final_rows = {}
    for (app_key, user, row) in rows:
        if app_key not in final_rows:
            final_rows[app_key] = row

    print(f"Log analysis complete! Generated {len(final_rows)} unique applications")
    logger.info(f"process_logs completed. Total final rows: {len(final_rows)}")
    return list(final_rows.values()), app_name_map

def _create_base_row():
    """Create base row template with default values."""
    return {
        CSVColumns.ROUTING_TYPE: "",
        CSVColumns.RESOURCE_LOCATION: "",
        CSVColumns.DESCRIPTION: "",
        CSVColumns.CATEGORY: "",
        CSVColumns.UNIQUE_USERS: 0,
        CSVColumns.TOTAL_VISITS: 0,
        CSVColumns.UNIQUE_USERS_PER_APP: 0
    }

def _process_intranet_app(app, rows):
    """Process a single intranet application."""
    app_name = app.name
    app_type = AppType.TCP_UDP.value
    dest = app.dest
    port = app.port
    protocol = app.proto

    # Determine destination for IP range check
    dest1 = dest
    if '-' in dest and all(is_valid_ip(ip.strip()) for ip in dest.split('-')):
        dest1, _ = dest.split('-')
        dest1 = dest1.strip()

    base_row = _create_base_row()
    destination_port_protocol = f"{dest}:{port}:{protocol}"
    
    if is_private_rfc1918(dest1) or is_valid_fqdn(dest1) or is_valid_hostname(dest1):
        row = {
            **base_row,
            CSVColumns.APP_NAME: app_name,
            CSVColumns.APP_LOCATION: AppLocation.INSIDE.value,
            CSVColumns.APP_TYPE: app_type,
            CSVColumns.URL: "",
            CSVColumns.RELATED_DOMAINS: "",
            CSVColumns.DESTINATION_PORT_PROTOCOL: destination_port_protocol,
        }
    else:
        row = {
            **base_row,
            CSVColumns.APP_NAME: "uncategorized_apps",
            CSVColumns.APP_LOCATION: AppLocation.OUTSIDE.value,
            CSVColumns.APP_TYPE: app_type,
            CSVColumns.URL: "",
            CSVColumns.RELATED_DOMAINS: "",
            CSVColumns.DESTINATION_PORT_PROTOCOL: destination_port_protocol,
        }
        logger.info(f"Adding under uncategorized apps {app_name} {destination_port_protocol} as it is a public IP configured in ns.conf")
    
    rows.append(row)

def _process_vpn_urls(vpn_urls, rows):
    """Process VPN URLs and create corresponding rows."""
    for name, url in vpn_urls:
        protocol = get_vpn_url_proto_match(url)
        base_row = _create_base_row()
        
        if protocol == vpn_url_proto.RDP.value or protocol == vpn_url_proto.FTP.value:
            ports = [3389] if protocol == vpn_url_proto.RDP.value else [20, 21]
            for port in ports:
                row = {
                    **base_row,
                    CSVColumns.APP_NAME: name,
                    CSVColumns.APP_LOCATION: AppLocation.INSIDE.value,
                    CSVColumns.APP_TYPE: AppType.TCP_UDP.value,
                    CSVColumns.URL: "",
                    CSVColumns.RELATED_DOMAINS: "",
                    CSVColumns.DESTINATION_PORT_PROTOCOL: f"{urlparse(url).hostname}:{port}:{Protocol.TCP.value}",
                }
                rows.append(row)
        else:
            row = {
                **base_row,
                CSVColumns.APP_NAME: name,
                CSVColumns.APP_LOCATION: AppLocation.INSIDE.value,
                CSVColumns.APP_TYPE: AppType.HTTP_HTTPS.value,
                CSVColumns.URL: url,
                CSVColumns.RELATED_DOMAINS: f"*.{urlparse(url).hostname}",
                CSVColumns.DESTINATION_PORT_PROTOCOL: "",
            }
            rows.append(row)

def generate_conf_only_rows(nsconf_parser):
    rows = []
    
    # Process Intranet Applications
    for app in nsconf_parser.get_active_intranet_apps():
        _process_intranet_app(app, rows)
    
    # Process VPN URLs
    _process_vpn_urls(nsconf_parser.get_vpn_urls(), rows)
    
    return rows

def _parse_nsconf():
    """Parse NetScaler configuration."""
    logger.info("Parsing ns.conf...")
    print("\nParsing NetScaler configuration...")
    nsconf_parser = NSConfParser(NSCONF_PATH)
    try:
        lb_cs_vservers = nsconf_parser.parse()
        return nsconf_parser, lb_cs_vservers
    except Exception as e:
        logger.error(f"Error parsing ns.conf: {e}")
        print(f"Error parsing ns.conf: {e}")
        raise e

def _extract_log_data():
    """Extract log data from files."""
    print("\nExtracting log data...")
    logger.info("Extracting logs...")
    try:
        logs, matched_regex = extract_logs()
        return logs, matched_regex
    except Exception as e:
        logger.error(f"Error extracting logs: {e}")
        print(f"Error extracting logs: {e}")
        raise e

def _get_value_for_stats(row):
    """Get value for statistics calculation."""
    if row[CSVColumns.APP_TYPE] in [AppType.HTTP_HTTPS.value, AppType.SAAS.value]:
        return row[CSVColumns.URL]
    else:
        return row[CSVColumns.DESTINATION_PORT_PROTOCOL]

def _update_row_stats(row):
    """Update statistics for a single row."""
    row[CSVColumns.UNIQUE_USERS_PER_APP] = len(app_unique_users.get(row[CSVColumns.APP_NAME], set()))
    value = _get_value_for_stats(row)
    value = row[CSVColumns.APP_NAME] + "_" + value
    
    if row[CSVColumns.UNIQUE_USERS] == 0:
        row[CSVColumns.UNIQUE_USERS] = len(unique_users.get(value, set()))
    if row[CSVColumns.TOTAL_VISITS] == 0:
        row[CSVColumns.TOTAL_VISITS] = total_visits.get(value, 0)

def _merge_conf_only_rows(grouped_rows, conf_only_rows):
    """Merge configuration-only rows with existing rows."""
    existing_keys = set()
    for row in grouped_rows:
        key = (row[CSVColumns.APP_NAME], row[CSVColumns.APP_TYPE], 
               row[CSVColumns.URL], row[CSVColumns.DESTINATION_PORT_PROTOCOL])
        existing_keys.add(key)
    
    for row in conf_only_rows:
        key = (row[CSVColumns.APP_NAME], row[CSVColumns.APP_TYPE], 
               row[CSVColumns.URL], row[CSVColumns.DESTINATION_PORT_PROTOCOL])
        if key not in existing_keys:
            _update_row_stats(row)
            grouped_rows.append(row)

def _update_all_stats(grouped_rows, lb_cs_vservers):
    """Update statistics for all rows."""
    print("\nFinalizing application statistics...")
    
    # Update stats for grouped rows
    for row in progress_bar(grouped_rows, desc="Calculating user stats", unit="apps", leave=False):
        _update_row_stats(row)
    
    # Update stats for LB vservers and add to grouped rows
    for row in lb_cs_vservers:
        _update_row_stats(row)
        grouped_rows.append(row)

def _write_csv_file(grouped_rows):
    """Write results to CSV file."""
    logger.info(f"Writing {len(grouped_rows)} rows to CSV...")
    print(f"\nWriting {len(grouped_rows)} applications to CSV...")
    
    try:
        with open(OUTPUT_CSV, "w", newline="") as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=CSV_COLUMNS)
            writer.writeheader()
            writer.writerows(grouped_rows)
    except Exception as ex:
        logger.error(f"Error writing CSV file: {ex}")
        print(f"Error writing CSV file: {ex}")
        raise

def _parse_nsconf():
    """Parse NS configuration and return parser and LB vservers."""
    print("Parsing NetScaler configuration...")
    logger.info(f"Parsing ns.conf file: {NSCONF_PATH}")
    nsconf_parser = NSConfParser(NSCONF_PATH)
    nsconf_parser.parse()
    
    lb_cs_vservers = nsconf_parser.lb_cs_vservers
    logger.info(f"Found {len(lb_cs_vservers)} lb/cs vservers")
    
    return nsconf_parser, lb_cs_vservers

def _extract_log_data():
    """Extract log data and return logs and matched regex."""
    print("Extracting log data...")
    try:
        logs, matched_regex = extract_logs()
    except Exception as e:
        logger.error(f"Error extracting logs: {e}")
        logs, matched_regex = [], []
        print(f"Error extracting logs: {e}")
        raise e
    return logs, matched_regex

def _update_all_stats(grouped_rows, lb_cs_vservers):
    """Update statistics for all rows."""
    print("\nFinalizing application statistics...")

    def update_row_stats_simple(row):
        row[CSVColumns.UNIQUE_USERS_PER_APP] = len(app_unique_users.get(row[CSVColumns.APP_NAME], set()))
        if row[CSVColumns.APP_TYPE] == AppType.HTTP_HTTPS.value or row[CSVColumns.APP_TYPE] == AppType.SAAS.value:
            value = row[CSVColumns.URL]
        else:
            value = row[CSVColumns.DESTINATION_PORT_PROTOCOL]
        value = row[CSVColumns.APP_NAME] + "_" + value
        if row[CSVColumns.UNIQUE_USERS] == 0:
            row[CSVColumns.UNIQUE_USERS] = len(unique_users.get(value, set()))
        if row[CSVColumns.TOTAL_VISITS] == 0:
            row[CSVColumns.TOTAL_VISITS] = total_visits.get(value, 0)

    # Update stats for main grouped rows
    for row in progress_bar(grouped_rows, desc="Calculating user stats", unit="apps", leave=False):
        update_row_stats_simple(row)

    # Update stats for lb_cs_vservers rows and add them to grouped_rows
    for row in lb_cs_vservers:
        update_row_stats_simple(row)
        grouped_rows.append(row)

def _merge_conf_only_rows(grouped_rows, conf_only_rows):
    """Merge configuration-only rows with existing rows."""
    existing_keys = set()
    for row in grouped_rows:
        key = (row[CSVColumns.APP_NAME], row[CSVColumns.APP_TYPE], row[CSVColumns.URL], row[CSVColumns.DESTINATION_PORT_PROTOCOL])
        existing_keys.add(key)
    
    for row in conf_only_rows:
        key = (row[CSVColumns.APP_NAME], row[CSVColumns.APP_TYPE], row[CSVColumns.URL], row[CSVColumns.DESTINATION_PORT_PROTOCOL])
        if key not in existing_keys:
            row[CSVColumns.UNIQUE_USERS_PER_APP] = len(app_unique_users.get(row[CSVColumns.APP_NAME], set()))
            value = ""
            if row[CSVColumns.APP_TYPE] == AppType.HTTP_HTTPS.value or row[CSVColumns.APP_TYPE] == AppType.SAAS.value:
                value = row[CSVColumns.URL]
            else:
                value = row[CSVColumns.DESTINATION_PORT_PROTOCOL]
            value = row[CSVColumns.APP_NAME]+"_"+value
            if row[CSVColumns.UNIQUE_USERS] == 0:
                row[CSVColumns.UNIQUE_USERS] = len(unique_users.get(value, set()))
            if row[CSVColumns.TOTAL_VISITS] == 0:
                row[CSVColumns.TOTAL_VISITS] = total_visits.get(value, 0)
            grouped_rows.append(row)

def _count_unique_apps():
    """Count unique application names from CSV."""
    unique_app_names = set()
    try:
        with open(OUTPUT_CSV, "r", encoding="utf-8") as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                if "_saas" not in row[CSVColumns.APP_NAME]:
                    unique_app_names.add(row[CSVColumns.APP_NAME])
    except Exception as ex:
        logger.error(f"Error reading CSV file: {ex}")
        print(f"Error reading CSV file: {ex}")
    
    return unique_app_names

def main():
    try:
        print("Starting VPN to ZTNA Migration...")
        print("=" * 50)
        
        # Parse configuration
        nsconf_parser, lb_cs_vservers = _parse_nsconf()
        
        # Extract logs
        logs, matched_regex = _extract_log_data()
        
        # Process logs
        print(f"\nAnalyzing {len(logs)} log entries...")
        logger.info(f"Processing {len(logs)} log entries...")
        grouped_rows, app_name_map = process_logs(logs, nsconf_parser, matched_regex)
        
        # Generate configuration-only rows and merge
        conf_only_rows = generate_conf_only_rows(nsconf_parser)
        _merge_conf_only_rows(grouped_rows, conf_only_rows)
        
        # Import CS vserver details
        nsconf_parser.import_csvs_details()
        
        # Update all statistics
        _update_all_stats(grouped_rows, lb_cs_vservers)
        
        # Write to CSV
        _write_csv_file(grouped_rows)
        
        # Count and report results
        unique_app_names = _count_unique_apps()
        
        logger.info(f"Total unique apps: {len(unique_app_names)}")
        logger.debug(f"List of unique AppNames: {sorted(unique_app_names)}")
        logger.debug("AppName Map:")
        for key, value in app_name_map.items():
            logger.debug(f"{key}: {value}")
        
        logger.info(f"\nCSV file '{OUTPUT_CSV}' created successfully.")
        print(f"\nAnalysis complete! CSV file '{OUTPUT_CSV}' created successfully.")

    except Exception as ex:
        logger.error(f"Error in main: {ex}", exc_info=True)
        print(f"Error generating csv. Check {LOG_FILE} for details.")

if __name__ == "__main__":
    main()
