#!/usr/bin/env python3
"""
APIM Web Scanner - Search for exposed Azure APIM keys on the web
Uses multiple sources: Google dorking, GitHub, Pastebin, etc.
"""

import re
import sys
import json
import time
import argparse
from typing import List, Dict, Set
from urllib.parse import quote, urljoin

try:
    import requests
except ImportError:
    print("Error: 'requests' library required. Install with: pip install requests")
    sys.exit(1)


class APIMWebScanner:
    """Scanner for exposed APIM keys on the internet"""
    
    # Google dorks for finding APIM keys
    GOOGLE_DORKS = [
        'site:azure-api.net "Ocp-Apim-Subscription-Key"',
        '"Ocp-Apim-Subscription-Key" filetype:js',
        '"Ocp-Apim-Subscription-Key" filetype:json',
        'azure-api.net inurl:config OR inurl:settings',
        '"azure-api.net" AND ("subscription" OR "api-key")',
        'site:github.com "Ocp-Apim-Subscription-Key"',
        'site:pastebin.com "azure-api.net"',
        'site:gitlab.com "Ocp-Apim-Subscription-Key"',
    ]
    
    # GitHub code search queries
    GITHUB_QUERIES = [
        'Ocp-Apim-Subscription-Key',
        'azure-api.net',
        'apim subscription key',
        'OcpApimSubscriptionKey',
    ]
    
    # Common exposed file patterns
    EXPOSED_FILES = [
        'config.js',
        'config.json',
        'appsettings.json',
        'settings.json',
        '.env',
        'environment.js',
        'constants.js',
        'api-config.js',
    ]
    
    def __init__(self, github_token=None, verbose=False):
        self.github_token = github_token
        self.verbose = verbose
        self.session = requests.Session()
        
    def log(self, message, level="INFO"):
        if self.verbose:
            print(f"[{level}] {message}")
    
    def search_google(self, dork: str, num_results: int = 10) -> List[Dict]:
        """
        Search Google using dork (requires SerpAPI or similar)
        For manual use: print the dork strings
        """
        print(f"\n[GOOGLE DORK] Use this query:")
        print(f"  {dork}")
        print(f"  https://www.google.com/search?q={quote(dork)}")
        
        return []  # Manual process
    
    def search_github(self, query: str, max_results: int = 30) -> List[Dict]:
        """Search GitHub for exposed keys"""
        if not self.github_token:
            print("\n[!] GitHub token not provided. Get one at:")
            print("    https://github.com/settings/tokens")
            print(f"\n[GITHUB SEARCH] Manual query:")
            print(f"  {query}")
            print(f"  https://github.com/search?type=code&q={quote(query)}")
            return []
        
        results = []
        headers = {
            'Authorization': f'token {self.github_token}',
            'Accept': 'application/vnd.github.v3+json'
        }
        
        try:
            self.log(f"Searching GitHub: {query}")
            url = 'https://api.github.com/search/code'
            params = {
                'q': query,
                'per_page': min(max_results, 100)
            }
            
            response = self.session.get(url, headers=headers, params=params, timeout=30)
            
            if response.status_code == 200:
                data = response.json()
                results = data.get('items', [])
                self.log(f"Found {len(results)} results on GitHub")
            elif response.status_code == 403:
                print("[!] GitHub API rate limit exceeded")
            else:
                print(f"[!] GitHub API error: {response.status_code}")
                
        except Exception as e:
            self.log(f"Error searching GitHub: {e}", "ERROR")
        
        return results
    
    def extract_keys_from_github_file(self, file_info: Dict) -> List[Dict]:
        """Download and extract keys from GitHub file"""
        keys_found = []
        
        try:
            # Get raw file content
            raw_url = file_info.get('html_url', '').replace('/blob/', '/raw/')
            if not raw_url:
                return keys_found
            
            self.log(f"Analyzing: {file_info['path']}")
            response = self.session.get(raw_url, timeout=15)
            
            if response.status_code == 200:
                content = response.text
                
                # Look for APIM patterns
                key_patterns = [
                    r'Ocp-Apim-Subscription-Key["\s:=]+([a-zA-Z0-9\-]{30,})',
                    r'subscription[-_]?key["\s:=]+([a-zA-Z0-9\-]{30,})',
                    r'[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}',
                    r'https://([a-zA-Z0-9\-]+)\.azure-api\.net',
                ]
                
                for pattern in key_patterns:
                    matches = re.findall(pattern, content, re.IGNORECASE)
                    for match in matches:
                        keys_found.append({
                            'key': match,
                            'source': file_info['html_url'],
                            'repo': file_info['repository']['full_name'],
                            'path': file_info['path']
                        })
        
        except Exception as e:
            self.log(f"Error extracting from file: {e}", "ERROR")
        
        return keys_found
    
    def search_common_endpoints(self, base_url: str) -> List[str]:
        """Try to find exposed config files on a domain"""
        urls_to_check = []
        
        parsed = base_url.rstrip('/')
        
        for file_pattern in self.EXPOSED_FILES:
            urls_to_check.append(f"{parsed}/{file_pattern}")
            urls_to_check.append(f"{parsed}/js/{file_pattern}")
            urls_to_check.append(f"{parsed}/config/{file_pattern}")
            urls_to_check.append(f"{parsed}/assets/{file_pattern}")
            urls_to_check.append(f"{parsed}/static/{file_pattern}")
        
        exposed_files = []
        
        for url in urls_to_check:
            try:
                response = self.session.get(url, timeout=5)
                if response.status_code == 200:
                    # Check if it contains APIM-related content
                    if any(x in response.text.lower() for x in ['apim', 'subscription-key', 'azure-api']):
                        self.log(f"Found exposed file: {url}", "SUCCESS")
                        exposed_files.append({
                            'url': url,
                            'content': response.text
                        })
            except:
                pass
        
        return exposed_files
    
    def generate_report(self, findings: Dict) -> str:
        """Generate a comprehensive report"""
        report = []
        report.append("="*80)
        report.append("APIM WEB SCANNER REPORT")
        report.append("="*80)
        report.append("")
        
        # Google dorks
        if findings.get('google_dorks'):
            report.append("\n[GOOGLE DORK QUERIES]")
            report.append("Use these queries manually to find exposed keys:")
            report.append("")
            for dork in findings['google_dorks']:
                report.append(f"  {dork}")
                report.append(f"  https://www.google.com/search?q={quote(dork)}")
                report.append("")
        
        # GitHub results
        if findings.get('github_results'):
            report.append("\n[GITHUB FINDINGS]")
            report.append(f"Found {len(findings['github_results'])} potential exposures:")
            report.append("")
            for result in findings['github_results']:
                report.append(f"  Repository: {result.get('repo', 'N/A')}")
                report.append(f"  File: {result.get('path', 'N/A')}")
                report.append(f"  URL: {result.get('source', 'N/A')}")
                if result.get('key'):
                    report.append(f"  Key: {result['key'][:8]}...{result['key'][-4:]}")
                report.append("")
        
        # Exposed files
        if findings.get('exposed_files'):
            report.append("\n[EXPOSED CONFIGURATION FILES]")
            report.append(f"Found {len(findings['exposed_files'])} exposed files:")
            report.append("")
            for file_info in findings['exposed_files']:
                report.append(f"  URL: {file_info['url']}")
                report.append("")
        
        # Manual checks
        report.append("\n[MANUAL VERIFICATION STEPS]")
        report.append("")
        report.append("1. Search GitHub Advanced:")
        report.append("   https://github.com/search/advanced")
        report.append("   - Search for: Ocp-Apim-Subscription-Key")
        report.append("   - File extension: .js, .json, .config")
        report.append("")
        report.append("2. Search Shodan:")
        report.append("   https://www.shodan.io")
        report.append("   - Query: azure-api.net")
        report.append("")
        report.append("3. Check Public Buckets:")
        report.append("   - AWS S3: grep-app.com or buckets.grayhatwarfare.com")
        report.append("   - Azure Blobs: Similar services")
        report.append("")
        report.append("4. Search Pastebin/Gists:")
        report.append("   - site:pastebin.com azure-api.net")
        report.append("   - site:gist.github.com Ocp-Apim-Subscription-Key")
        report.append("")
        
        return "\n".join(report)


def main():
    parser = argparse.ArgumentParser(
        description='APIM Web Scanner - Find exposed Azure APIM keys online',
        formatter_class=argparse.RawDescriptionHelpFormatter
    )
    
    parser.add_argument('--github-token', help='GitHub personal access token')
    parser.add_argument('--domain', help='Specific domain to check for exposed config files')
    parser.add_argument('-o', '--output', help='Output report to file')
    parser.add_argument('-v', '--verbose', action='store_true', help='Verbose output')
    
    args = parser.parse_args()
    
    scanner = APIMWebScanner(github_token=args.github_token, verbose=args.verbose)
    
    findings = {
        'google_dorks': scanner.GOOGLE_DORKS,
        'github_results': [],
        'exposed_files': [],
        'timestamp': time.strftime('%Y-%m-%d %H:%M:%S')
    }
    
    # Search GitHub
    if args.github_token:
        print("\n[*] Searching GitHub...")
        for query in scanner.GITHUB_QUERIES:
            results = scanner.search_github(query)
            for result in results:
                keys = scanner.extract_keys_from_github_file(result)
                findings['github_results'].extend(keys)
            time.sleep(2)  # Rate limiting
    else:
        print("\n[!] No GitHub token provided. Showing manual search queries.")
    
    # Check specific domain
    if args.domain:
        print(f"\n[*] Checking domain: {args.domain}")
        exposed = scanner.search_common_endpoints(args.domain)
        findings['exposed_files'] = exposed
    
    # Generate report
    report = scanner.generate_report(findings)
    print(report)
    
    # Save to file
    if args.output:
        with open(args.output, 'w') as f:
            f.write(report)
            f.write("\n\n[RAW JSON DATA]\n")
            json.dump(findings, f, indent=2)
        print(f"\n[+] Report saved to: {args.output}")


if __name__ == '__main__':
    main()
