#!/usr/bin/env python3
"""
Azure APIM Key Scanner
Detects Azure API Management endpoints and tests for exposed subscription keys.
"""

import re
import sys
import json
import argparse
import concurrent.futures
from typing import List, Dict, Optional, Set
from urllib.parse import urljoin, urlparse

try:
    import requests
    from requests.adapters import HTTPAdapter
    from urllib3.util.retry import Retry
except ImportError:
    print("Error: 'requests' library required. Install with: pip install requests")
    sys.exit(1)


class APIMScanner:
    """Scanner for Azure APIM endpoints and subscription keys"""
    
    # Common APIM URL patterns
    APIM_PATTERNS = [
        r'https?://[a-zA-Z0-9\-]+\.azure-api\.net/?',
        r'https?://[a-zA-Z0-9\-]+\.management\.azure-api\.net/?',
        r'apim[a-zA-Z0-9\-]*\.azure-api\.net',
    ]
    
    # APIM subscription key patterns
    KEY_PATTERNS = [
        r'[0-9a-f]{32}',  # MD5-like hex (32 chars)
        r'[0-9a-zA-Z]{32,64}',  # Alphanumeric keys
        r'[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}',  # GUID format
    ]
    
    # Common APIM header names
    APIM_HEADERS = [
        'Ocp-Apim-Subscription-Key',
        'Ocp-Apim-Subscription-Key-Primary',
        'Ocp-Apim-Subscription-Key-Secondary',
        'X-Api-Key',
        'Api-Key',
        'ApiKey',
    ]
    
    # Test endpoints commonly found in APIM
    TEST_ENDPOINTS = [
        '/api/health',
        '/health',
        '/status',
        '/api/status',
        '/api/v1/health',
        '/api/v2/health',
        '/swagger',
        '/swagger/index.html',
        '/api-docs',
        '/openapi.json',
        '/',
    ]
    
    def __init__(self, timeout=10, threads=5, verbose=False):
        self.timeout = timeout
        self.threads = threads
        self.verbose = verbose
        self.session = self._create_session()
        
    def _create_session(self):
        """Create requests session with retry logic"""
        session = requests.Session()
        retry = Retry(
            total=3,
            backoff_factor=0.3,
            status_forcelist=[500, 502, 503, 504]
        )
        adapter = HTTPAdapter(max_retries=retry)
        session.mount('http://', adapter)
        session.mount('https://', adapter)
        return session
    
    def log(self, message, level="INFO"):
        """Log message if verbose"""
        if self.verbose:
            print(f"[{level}] {message}")
    
    def extract_apim_urls(self, text: str) -> Set[str]:
        """Extract APIM URLs from text"""
        urls = set()
        for pattern in self.APIM_PATTERNS:
            matches = re.findall(pattern, text, re.IGNORECASE)
            urls.update(matches)
        return urls
    
    def extract_potential_keys(self, text: str) -> Set[str]:
        """Extract potential APIM subscription keys from text"""
        keys = set()
        for pattern in self.KEY_PATTERNS:
            matches = re.findall(pattern, text)
            # Filter out common false positives
            keys.update([k for k in matches if self._is_valid_key_format(k)])
        return keys
    
    def _is_valid_key_format(self, key: str) -> bool:
        """Validate key format to reduce false positives"""
        # Exclude common false positives
        false_positives = [
            'ffffffff',
            '00000000',
            '12345678',
            'deadbeef',
        ]
        if key.lower() in false_positives:
            return False
        
        # Check for sufficient entropy (not all same chars)
        if len(set(key.lower())) < 5:
            return False
        
        return True
    
    def detect_apim_endpoint(self, url: str) -> Dict:
        """Detect if URL is an Azure APIM endpoint"""
        result = {
            'url': url,
            'is_apim': False,
            'status_code': None,
            'headers': {},
            'requires_key': False,
            'error': None
        }
        
        try:
            self.log(f"Testing endpoint: {url}")
            response = self.session.get(url, timeout=self.timeout, allow_redirects=True)
            result['status_code'] = response.status_code
            result['headers'] = dict(response.headers)
            
            # Check for APIM-specific headers
            apim_indicators = [
                'Ocp-Apim-Trace-Location',
                'X-AspNet-Version',
                'X-Powered-By',
            ]
            
            for header in apim_indicators:
                if header.lower() in [h.lower() for h in response.headers.keys()]:
                    result['is_apim'] = True
                    self.log(f"APIM indicator found: {header}", "SUCCESS")
            
            # Check response body for APIM errors
            if response.status_code in [401, 403]:
                body_lower = response.text.lower()
                if any(x in body_lower for x in ['subscription', 'api-key', 'ocp-apim']):
                    result['is_apim'] = True
                    result['requires_key'] = True
                    self.log(f"APIM authentication required (status {response.status_code})", "WARN")
            
            # Check for APIM in response
            if 'azure-api.net' in response.text.lower():
                result['is_apim'] = True
                
        except requests.exceptions.RequestException as e:
            result['error'] = str(e)
            self.log(f"Error testing {url}: {e}", "ERROR")
        
        return result
    
    def test_key(self, url: str, key: str, header: str = 'Ocp-Apim-Subscription-Key') -> Dict:
        """Test if a subscription key works with an APIM endpoint"""
        result = {
            'url': url,
            'key': key[:8] + '...' + key[-4:],  # Masked for logging
            'header': header,
            'valid': False,
            'status_code': None,
            'response_time': None,
            'error': None
        }
        
        try:
            self.log(f"Testing key with {header} header")
            import time
            start_time = time.time()
            
            headers = {header: key}
            response = self.session.get(url, headers=headers, timeout=self.timeout)
            
            result['status_code'] = response.status_code
            result['response_time'] = round(time.time() - start_time, 2)
            
            # Key is valid if we get 2xx or specific 4xx that aren't auth errors
            if response.status_code < 300:
                result['valid'] = True
                self.log(f"✓ Key works! Status: {response.status_code}", "SUCCESS")
            elif response.status_code in [400, 404, 405, 429]:
                # These suggest auth passed but request issue
                result['valid'] = True
                result['note'] = 'Key valid but endpoint/method issue'
                self.log(f"✓ Key valid (status {response.status_code})", "SUCCESS")
            elif response.status_code in [401, 403]:
                result['valid'] = False
                self.log(f"✗ Key invalid (status {response.status_code})", "WARN")
            
        except requests.exceptions.RequestException as e:
            result['error'] = str(e)
            self.log(f"Error testing key: {e}", "ERROR")
        
        return result
    
    def scan_url(self, base_url: str, keys: List[str] = None) -> Dict:
        """Comprehensive scan of a URL"""
        self.log(f"\n{'='*60}\nScanning: {base_url}\n{'='*60}")
        
        results = {
            'base_url': base_url,
            'endpoints_tested': [],
            'valid_keys': [],
            'invalid_keys': [],
            'errors': []
        }
        
        # Test base URL and common endpoints
        test_urls = [base_url] + [urljoin(base_url, ep) for ep in self.TEST_ENDPOINTS]
        
        for url in test_urls:
            endpoint_result = self.detect_apim_endpoint(url)
            results['endpoints_tested'].append(endpoint_result)
            
            # If endpoint requires key and we have keys to test
            if endpoint_result.get('requires_key') and keys:
                for key in keys:
                    for header in self.APIM_HEADERS:
                        key_result = self.test_key(url, key, header)
                        if key_result['valid']:
                            results['valid_keys'].append({
                                'key': key,
                                'url': url,
                                'header': header,
                                'result': key_result
                            })
                        else:
                            results['invalid_keys'].append({
                                'key': key[:8] + '...',
                                'url': url,
                                'header': header
                            })
        
        return results
    
    def scan_file(self, file_path: str) -> Dict:
        """Scan a file for APIM URLs and keys"""
        self.log(f"Scanning file: {file_path}")
        
        try:
            with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                content = f.read()
        except Exception as e:
            return {'error': f"Failed to read file: {e}"}
        
        urls = self.extract_apim_urls(content)
        keys = self.extract_potential_keys(content)
        
        self.log(f"Found {len(urls)} APIM URLs")
        self.log(f"Found {len(keys)} potential keys")
        
        results = {
            'file': file_path,
            'urls_found': list(urls),
            'keys_found': len(keys),
            'scan_results': []
        }
        
        # Scan each URL found
        for url in urls:
            scan_result = self.scan_url(url, list(keys))
            results['scan_results'].append(scan_result)
        
        return results
    
    def scan_github_repo(self, repo_url: str) -> Dict:
        """Search GitHub repo for APIM keys (requires GitHub API token)"""
        # This would use GitHub Code Search API
        # Placeholder for future implementation
        return {
            'error': 'GitHub scanning not yet implemented. Use: gh api /search/code'
        }


def print_results(results: Dict):
    """Pretty print scan results"""
    print("\n" + "="*80)
    print("SCAN RESULTS")
    print("="*80)
    
    if 'file' in results:
        print(f"\nFile: {results['file']}")
        print(f"APIM URLs Found: {len(results['urls_found'])}")
        for url in results['urls_found']:
            print(f"  - {url}")
        
        print(f"\nPotential Keys Found: {results['keys_found']}")
        
        for scan in results.get('scan_results', []):
            print(f"\n{'-'*80}")
            print(f"URL: {scan['base_url']}")
            
            # Valid keys
            if scan['valid_keys']:
                print(f"\n✓ VALID KEYS FOUND:")
                for item in scan['valid_keys']:
                    print(f"  Key: {item['key'][:8]}...{item['key'][-4:]}")
                    print(f"  URL: {item['url']}")
                    print(f"  Header: {item['header']}")
                    print(f"  Status: {item['result']['status_code']}")
                    print()
            
            # Endpoints tested
            apim_endpoints = [e for e in scan['endpoints_tested'] if e['is_apim']]
            if apim_endpoints:
                print(f"\n✓ APIM Endpoints Detected:")
                for ep in apim_endpoints:
                    print(f"  - {ep['url']} (Status: {ep['status_code']})")
                    if ep['requires_key']:
                        print(f"    [!] Requires subscription key")


def main():
    parser = argparse.ArgumentParser(
        description='Azure APIM Key Scanner - Detect and test APIM subscription keys',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Scan a file for APIM URLs and keys
  python apim_key_scanner.py -f app.js
  
  # Test specific URL with key
  python apim_key_scanner.py -u https://api.example.azure-api.net -k your-key-here
  
  # Scan URL without key (detection only)
  python apim_key_scanner.py -u https://api.example.azure-api.net
  
  # Scan binary file
  python apim_key_scanner.py -f libapp.so --verbose
        """
    )
    
    parser.add_argument('-f', '--file', help='File to scan for APIM URLs and keys')
    parser.add_argument('-u', '--url', help='APIM URL to test')
    parser.add_argument('-k', '--key', help='Subscription key to test')
    parser.add_argument('--keys-file', help='File containing keys to test (one per line)')
    parser.add_argument('-t', '--timeout', type=int, default=10, help='Request timeout (default: 10s)')
    parser.add_argument('--threads', type=int, default=5, help='Number of threads (default: 5)')
    parser.add_argument('-v', '--verbose', action='store_true', help='Verbose output')
    parser.add_argument('-o', '--output', help='Output results to JSON file')
    
    args = parser.parse_args()
    
    if not args.file and not args.url:
        parser.print_help()
        sys.exit(1)
    
    scanner = APIMScanner(timeout=args.timeout, threads=args.threads, verbose=args.verbose)
    
    # Load keys if provided
    keys = []
    if args.key:
        keys.append(args.key)
    if args.keys_file:
        try:
            with open(args.keys_file, 'r') as f:
                keys.extend([line.strip() for line in f if line.strip()])
        except Exception as e:
            print(f"Error reading keys file: {e}")
            sys.exit(1)
    
    # Perform scan
    results = None
    if args.file:
        results = scanner.scan_file(args.file)
    elif args.url:
        results = scanner.scan_url(args.url, keys if keys else None)
    
    # Output results
    if results:
        print_results(results)
        
        if args.output:
            try:
                with open(args.output, 'w') as f:
                    json.dump(results, f, indent=2)
                print(f"\n[+] Results saved to: {args.output}")
            except Exception as e:
                print(f"Error saving results: {e}")


if __name__ == '__main__':
    main()
