"""
CTF Challenge Solution Exploration

Let's think like a CTF player and find the trick!
"""

import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))

import io
import zipfile
import hashlib
from pathlib import Path
from PIL import Image
import numpy as np


def explore_verification_bug():
    """
    Look for bugs in the verification logic.
    
    Key observation: The verification reads files from ZIP and checks MD5.
    Let's see if there's any quirk we can exploit.
    """
    print("="*80)
    print("LOOKING FOR VERIFICATION BUGS")
    print("="*80)
    
    print("\nKey verification steps:")
    print("1. Read img1_bytes and img2_bytes from ZIP")
    print("2. Load them with PIL: img1 = load_gray_from_bytes(img1_bytes)")
    print("3. Check pixel differences")
    print("4. Check: if md5_hex(img1_bytes) != md5_hex(img2_bytes): return False")
    print("5. Check classifications")
    
    print("\n🤔 Wait... let me check something about the MD5 comparison...")
    
    # The condition is: if hashes are NOT equal, return False
    # So it REQUIRES hashes to be equal
    # But what if one or both are None or empty?
    
    print("\nWhat if we try edge cases?")
    print("- Empty bytes?")
    print("- None values?")
    print("- What about the ZIP file structure itself?")


def explore_png_tricks():
    """
    PNG format has many chunks. Let's see if we can exploit them.
    """
    print("\n" + "="*80)
    print("EXPLORING PNG FORMAT TRICKS")
    print("="*80)
    
    print("\nPNG Critical Chunks (MUST be processed):")
    print("  IHDR - Image header")
    print("  PLTE - Palette")
    print("  IDAT - Image data")
    print("  IEND - Image trailer")
    
    print("\nPNG Ancillary Chunks (CAN be ignored):")
    print("  tEXt - Textual data")
    print("  gAMA - Gamma correction")
    print("  pHYs - Physical pixel dimensions")
    print("  tIME - Last modification time")
    
    print("\n💡 Idea: What if PIL has non-deterministic behavior with certain chunks?")
    print("Let me test if PIL caches or has state between loads...")


def test_pil_state():
    """
    Test if PIL has any stateful behavior that could be exploited.
    """
    print("\n" + "="*80)
    print("TESTING PIL STATE BEHAVIOR")
    print("="*80)
    
    refs_dir = Path("src/data/refs")
    ref_path = refs_dir / "ref_00.png"
    
    with open(ref_path, 'rb') as f:
        img_bytes = f.read()
    
    # Test: Does PIL's Image.open have any global state?
    from PIL import Image as PILImage
    
    # Load same bytes multiple times in different ways
    print("\nTest 1: Multiple loads from same BytesIO")
    bio = io.BytesIO(img_bytes)
    img1 = PILImage.open(bio)
    bio.seek(0)  # Reset to beginning
    img2 = PILImage.open(bio)
    
    arr1 = np.array(img1.convert('L'))
    arr2 = np.array(img2.convert('L'))
    print(f"  Same result: {np.array_equal(arr1, arr2)}")
    
    print("\nTest 2: Load with different BytesIO objects")
    img3 = PILImage.open(io.BytesIO(img_bytes))
    img4 = PILImage.open(io.BytesIO(img_bytes))
    arr3 = np.array(img3.convert('L'))
    arr4 = np.array(img4.convert('L'))
    print(f"  Same result: {np.array_equal(arr3, arr4)}")
    
    print("\nTest 3: Check if context manager affects anything")
    def load_with_context(data):
        with PILImage.open(io.BytesIO(data)) as img:
            return img.convert("L")
    
    img5 = load_with_context(img_bytes)
    img6 = load_with_context(img_bytes)
    
    # Try to access pixels after context exits
    try:
        arr5 = np.array(img5)
        arr6 = np.array(img6)
        print(f"  Same result: {np.array_equal(arr5, arr6)}")
        print(f"  Images still accessible after context: True")
    except Exception as e:
        print(f"  Error: {e}")


def explore_zip_tricks():
    """
    ZIP files can have duplicate entries, symlinks, etc.
    """
    print("\n" + "="*80)
    print("EXPLORING ZIP FILE TRICKS")
    print("="*80)
    
    print("\n🎯 KEY INSIGHT: What if ZIP allows duplicate filenames?")
    print("Or what if we can create a ZIP with special properties?")
    
    # Test: Can we create a ZIP with the same file added twice?
    print("\nTest: Creating ZIP with duplicate entries...")
    
    refs_dir = Path("src/data/refs")
    ref0 = refs_dir / "ref_00.png"
    ref1 = refs_dir / "ref_01.png"
    
    with open(ref0, 'rb') as f:
        data0 = f.read()
    with open(ref1, 'rb') as f:
        data1 = f.read()
    
    # Test 1: Normal ZIP
    zip_buf = io.BytesIO()
    with zipfile.ZipFile(zip_buf, 'w') as zf:
        zf.writestr("test_img1.png", data0)
        zf.writestr("test_img2.png", data0)  # Same data
    
    # Read it back
    zip_buf.seek(0)
    with zipfile.ZipFile(zip_buf, 'r') as zf:
        read1 = zf.read("test_img1.png")
        read2 = zf.read("test_img2.png")
        print(f"  Data identical: {read1 == read2}")
        print(f"  MD5 img1: {hashlib.md5(read1).hexdigest()}")
        print(f"  MD5 img2: {hashlib.md5(read2).hexdigest()}")
    
    print("\n💡 Hmm, that's expected behavior...")
    print("Let me think of other ZIP tricks...")


def check_model_preprocessing():
    """
    Maybe there's something in the preprocessing that could differ?
    """
    print("\n" + "="*80)
    print("CHECKING MODEL PREPROCESSING PIPELINE")
    print("="*80)
    
    from model import get_model_bundle, _preprocess_tensor
    from torchvision.transforms import functional as TF
    import torch
    
    print("\nPreprocessing steps:")
    print("1. Convert to grayscale (if needed)")
    print("2. Resize to [28, 28] with antialias=True")
    print("3. Center crop to [28, 28]")
    print("4. Normalize with MNIST mean/std")
    
    print("\n🔍 The resize uses antialias=True...")
    print("Could there be any non-determinism in the antialiasing?")
    
    # Test resize determinism
    refs_dir = Path("src/data/refs")
    ref_path = refs_dir / "ref_00.png"
    img = Image.open(ref_path).convert('L')
    
    tensor = TF.to_tensor(img).unsqueeze(0)
    
    results = []
    for i in range(5):
        resized = TF.resize(tensor, [28, 28], antialias=True)
        results.append(resized.numpy().tobytes())
    
    all_same = all(r == results[0] for r in results)
    print(f"\nResize determinism test: {'PASS' if all_same else 'FAIL'}")
    
    if not all_same:
        print("⚠️ FOUND NON-DETERMINISM IN RESIZE!")


def ultimate_insight():
    """
    Let me think about this more carefully...
    """
    print("\n" + "="*80)
    print("💡 WAIT... LET ME RE-READ THE VERIFICATION CODE")
    print("="*80)
    
    print("""
Looking at verification.py lines 78-82:

    img1_bytes = zf.read(img1_name)
    img2_bytes = zf.read(img2_name)
    
    try:
        img1 = load_gray_from_bytes(img1_bytes)
        img2 = load_gray_from_bytes(img2_bytes)

And line 101:
    if md5_hex(img1_bytes) != md5_hex(img2_bytes):
        return False

So img1_bytes and img2_bytes MUST be identical...

But then lines 84-99 check:
    img1_arr = image_to_uint8(img1)
    img2_arr = image_to_uint8(img2)
    
    if pixel_diff(ref_arr, img1_arr) != 0:
        return False  # img1 must match reference exactly
    
    diff_img2_img1 = pixel_diff(ref_arr, img2_arr)  # NOTE: compares to ref, not img1!
    if diff_img2_img1 > budget:
        return False

🤯 WAIT A SECOND!!!

The variable is named "diff_img2_img1" but it actually computes:
    pixel_diff(ref_arr, img2_arr)

This compares img2 to REFERENCE, not to img1!

So the checks are:
1. img1 must match reference exactly (0 pixel diff)
2. img2 must be within budget of reference
3. img1_bytes == img2_bytes (same MD5)
4. img1 and img2 must classify differently

If img1_bytes == img2_bytes, then img1 and img2 should be identical images...
But somehow they must classify differently?

🎯 THE REAL QUESTION: Is there ANY way for load_gray_from_bytes() to return
different results when called with the same bytes?

Let me look for:
1. Global state in PIL
2. Random number generators
3. Caching issues
4. File pointer positions
5. Context manager bugs
    """)


if __name__ == "__main__":
    explore_verification_bug()
    explore_png_tricks()
    test_pil_state()
    explore_zip_tricks()
    check_model_preprocessing()
    ultimate_insight()
    
    print("\n" + "="*80)
    print("🎯 POSSIBLE SOLUTION PATHS")
    print("="*80)
    print("""
1. MD5 COLLISION (Most Realistic for CTF):
   - Use HashClash or similar tools
   - Create two different PNGs with same MD5
   - Modify them to classify differently
   - Tools: hashclash, unicoll, fastcoll

2. FIND A BUG IN VERIFICATION:
   - Review verification.py line by line
   - Look for logic errors
   - Test edge cases

3. PIL NON-DETERMINISM:
   - Check PIL version for known bugs
   - Test with corrupted/malformed PNGs
   - Try PNGs with unusual chunk combinations

4. PREPROCESSING TRICKS:
   - Find non-determinism in torch resize
   - Exploit floating point precision
   - Cache invalidation

Let me create a script to help with approach #1 (MD5 collisions)...
    """)