#!/usr/bin/env python3
"""
Alternative methods to extract and decode the payload from Invoice.docm
"""
import re
import base64
import xml.etree.ElementTree as ET

def xor_decrypt(text, key):
    """XOR decrypt text with key"""
    result = []
    key_len = len(key)
    key_pos = 0
    
    for i, char in enumerate(text):
        key_pos = (key_pos % key_len)
        result.append(chr(ord(char) ^ ord(key[key_pos])))
        key_pos += 1
    
    return ''.join(result)

print("=" * 70)
print("METHOD 1: Extract from document.xml using regex")
print("=" * 70)

try:
    # Read the document.xml file
    with open('extracted_invoice/word/document.xml', 'r', encoding='utf-8') as f:
        xml_content = f.read()
    
    # Method 1: Simple regex to find textbox content
    # Look for w:txbxContent tags which contain textbox content
    textbox_match = re.search(r'<w:txbxContent>(.*?)</w:txbxContent>', xml_content, re.DOTALL)
    
    if textbox_match:
        textbox_content = textbox_match.group(1)
        
        # Extract all text within w:t tags from the textbox
        text_matches = re.findall(r'<w:t[^>]*>(.*?)</w:t>', textbox_content, re.DOTALL)
        
        if text_matches:
            # Combine all text pieces
            full_text = ''.join(text_matches)
            print(f"Found textbox content with {len(full_text)} characters")
            
            # Split by pipe as the macro does
            parts = full_text.split('|')
            print(f"Split into {len(parts)} parts")
            
            if len(parts) >= 3:
                payload = parts[2]
                print(f"Payload length: {len(payload)} characters")
                print(f"First 100 chars: {payload[:100]}")
                
                # Decrypt
                decrypted = xor_decrypt(payload, "ph15h1n9")
                flag = base64.b64decode(decrypted).decode('utf-8', errors='ignore')
                
                print("\n" + "=" * 70)
                print("FLAG FOUND:")
                print("=" * 70)
                print(flag)
                print("=" * 70)
            else:
                print(f"ERROR: Expected at least 3 parts, got {len(parts)}")
        else:
            print("No w:t tags found in textbox")
    else:
        print("No textbox content found")
        
except Exception as e:
    print(f"Error in Method 1: {e}")
    import traceback
    traceback.print_exc()

print("\n" + "=" * 70)
print("METHOD 2: Extract using XML parsing with namespaces")
print("=" * 70)

try:
    # Parse with namespaces
    tree = ET.parse('extracted_invoice/word/document.xml')
    root = tree.getroot()
    
    # Define namespaces
    namespaces = {
        'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
        'v': 'urn:schemas-microsoft-com:vml',
        'wps': 'http://schemas.microsoft.com/office/word/2010/wordprocessingShape'
    }
    
    # Find all textbox content
    textboxes = root.findall('.//w:txbxContent', namespaces)
    print(f"Found {len(textboxes)} textboxes")
    
    for idx, textbox in enumerate(textboxes):
        # Get all text elements
        text_elements = textbox.findall('.//w:t', namespaces)
        if text_elements:
            combined_text = ''.join([elem.text for elem in text_elements if elem.text])
            
            if '|' in combined_text:
                print(f"\nTextbox {idx + 1}:")
                print(f"  Length: {len(combined_text)}")
                print(f"  Preview: {combined_text[:80]}...")
                
                parts = combined_text.split('|')
                if len(parts) >= 3:
                    payload = parts[2]
                    decrypted = xor_decrypt(payload, "ph15h1n9")
                    flag = base64.b64decode(decrypted).decode('utf-8', errors='ignore')
                    print(f"  Decoded flag: {flag}")
                    
except Exception as e:
    print(f"Error in Method 2: {e}")
    import traceback
    traceback.print_exc()

print("\n" + "=" * 70)
print("METHOD 3: Using olefile to directly access OLE streams")
print("=" * 70)

try:
    import olefile
    
    if olefile.isOleFile('Invoice.docm'):
        print("Invoice.docm is an OLE file")
        ole = olefile.OleFileIO('Invoice.docm')
        
        # List all streams
        print("\nAvailable streams:")
        for stream in ole.listdir():
            print(f"  {'/'.join(stream)}")
        
        ole.close()
    else:
        print("Invoice.docm is not an OLE file (it's a ZIP-based OpenXML format)")
        
except ImportError:
    print("olefile module not installed")
except Exception as e:
    print(f"Error in Method 3: {e}")

print("\n" + "=" * 70)
print("METHOD 4: Using PowerShell XML parsing")
print("=" * 70)
print("You can also extract the payload using PowerShell:")
print("""
$xml = [xml](Get-Content extracted_invoice\\word\\document.xml)
$ns = New-Object System.Xml.XmlNamespaceManager($xml.NameTable)
$ns.AddNamespace('w', 'http://schemas.openxmlformats.org/wordprocessingml/2006/main')
$textbox = $xml.SelectSingleNode('//w:txbxContent', $ns)
$text = $textbox.SelectNodes('.//w:t', $ns) | ForEach-Object { $_.'#text' } | Join-String
$parts = $text -split '\\|'
$payload = $parts[2]
Write-Host "Payload: $payload"
""")
