"""
FINAL ATTEMPT: Look for the actual CTF trick

Let me re-examine EVERYTHING one more time with fresh eyes.
Sometimes the solution is simpler than we think!
"""

import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))

import io
import zipfile
import hashlib
from pathlib import Path
from PIL import Image
import numpy as np


def reread_verification_carefully():
    """
    Let me read the verification code LINE BY LINE
    """
    print("="*80)
    print("RE-READING VERIFICATION.PY LINE BY LINE")
    print("="*80)
    
    with open('src/verification.py', 'r') as f:
        lines = f.readlines()
    
    print("\n🔍 Looking at the verify_zip function...")
    
    # Print key lines
    relevant_ranges = [
        (63, 120, "Main verification loop")
    ]
    
    for start, end, description in relevant_ranges:
        print(f"\n{description} (lines {start}-{end}):")
        print("-" * 80)
        for i in range(max(0, start-1), min(len(lines), end)):
            print(f"{i+1:3d}: {lines[i]}", end='')


def check_pixel_diff_logic():
    """
    Wait... let me look at pixel_diff more carefully
    """
    print("\n" + "="*80)
    print("ANALYZING PIXEL_DIFF FUNCTION")
    print("="*80)
    
    from verification import pixel_diff
    
    # The function compares two arrays
    # Let me trace through exactly what it does
    
    arr1 = np.array([[1, 2], [3, 4]], dtype=np.uint8)
    arr2 = np.array([[1, 2], [3, 5]], dtype=np.uint8)
    
    diff = pixel_diff(arr1, arr2)
    print(f"\nTest: arr1 vs arr2 with 1 different pixel")
    print(f"Result: {diff} pixels different")
    
    # Now test with identical
    arr3 = np.array([[1, 2], [3, 4]], dtype=np.uint8)
    diff2 = pixel_diff(arr1, arr3)
    print(f"\nTest: arr1 vs arr3 (identical)")
    print(f"Result: {diff2} pixels different")


def check_md5_implementation():
    """
    Is there anything weird about md5_hex?
    """
    print("\n" + "="*80)
    print("CHECKING MD5 IMPLEMENTATION")
    print("="*80)
    
    from verification import md5_hex
    
    test_data = b"hello"
    result = md5_hex(test_data)
    expected = hashlib.md5(test_data).hexdigest()
    
    print(f"\nTest data: {test_data}")
    print(f"md5_hex result: {result}")
    print(f"Expected: {expected}")
    print(f"Match: {result == expected}")
    
    # Test with empty
    empty_result = md5_hex(b"")
    print(f"\nEmpty bytes MD5: {empty_result}")


def test_zip_filename_tricks():
    """
    Can we do anything clever with ZIP filenames?
    """
    print("\n" + "="*80)
    print("TESTING ZIP FILENAME TRICKS")
    print("="*80)
    
    # Try various filename tricks
    refs_dir = Path("src/data/refs")
    ref0_path = refs_dir / "ref_00.png"
    ref1_path = refs_dir / "ref_01.png"
    
    with open(ref0_path, 'rb') as f:
        data0 = f.read()
    with open(ref1_path, 'rb') as f:
        data1 = f.read()
    
    # Test 1: Can we add same file with two names?
    print("\nTest 1: Adding different data with names that pass expected_filenames check")
    
    zip_buf = io.BytesIO()
    with zipfile.ZipFile(zip_buf, 'w') as zf:
        # For pair 00, add data0 as img1 and data1 as img2
        # They have different content, but maybe there's a check bug?
        zf.writestr("pair_00_img1.png", data0)
        zf.writestr("pair_00_img2.png", data1)
    
    # Read back and check
    zip_buf.seek(0)
    with zipfile.ZipFile(zip_buf, 'r') as zf:
        read1 = zf.read("pair_00_img1.png")
        read2 = zf.read("pair_00_img2.png")
        
        print(f"  img1 MD5: {hashlib.md5(read1).hexdigest()}")
        print(f"  img2 MD5: {hashlib.md5(read2).hexdigest()}")
        print(f"  Same MD5: {hashlib.md5(read1).hexdigest() == hashlib.md5(read2).hexdigest()}")
        print(f"  → This will FAIL the MD5 check as expected")


def ultimate_theory():
    """
    OK let me think about this from a CTF perspective...
    """
    print("\n" + "="*80)
    print("🧠 THINKING LIKE A CTF PLAYER")
    print("="*80)
    
    print("""
CTF challenges usually have one of these solutions:

1. ⚗️ CRYPTO ATTACK (MD5 Collision):
   - This is VERY HARD but possible
   - Requires HashClash or similar tools
   - Takes significant time/resources
   - For a CTF, this would be a HARD challenge worth many points

2. 🐛 LOGIC BUG:
   - Off-by-one error
   - Race condition
   - Type confusion
   - Missing validation

3. 🎭 FORMAT TRICK:
   - ZIP allows duplicate entries?
   - PNG has parseable quirks?
   - File system tricks?

4. 🎨 UNINTENDED SOLUTION:
   - Something the challenge author didn't think of
   - Edge case in a library
   - Version-specific bug

Let me check if there's a logic bug in the MD5 check...

The code does:
    if md5_hex(img1_bytes) != md5_hex(img2_bytes):
        return False

This checks: "if hashes are different, fail"
Equivalently: "hashes must be the same to pass"

There's NO way around this unless:
a) We find identical MD5s (collision)
b) There's a bug in md5_hex or hashlib
c) img1_bytes or img2_bytes is manipulated somehow

Wait... what if there's something with how the ZIP is read?
What if zf.read() can return different data on subsequent calls?
    """)


def test_multiple_zip_reads():
    """
    Does zf.read() always return the same data?
    """
    print("\n" + "="*80)
    print("TESTING MULTIPLE ZIP READS")
    print("="*80)
    
    refs_dir = Path("src/data/refs")
    ref_path = refs_dir / "ref_00.png"
    
    with open(ref_path, 'rb') as f:
        data = f.read()
    
    # Create ZIP
    zip_buf = io.BytesIO()
    with zipfile.ZipFile(zip_buf, 'w') as zf:
        zf.writestr("test.png", data)
    
    # Read multiple times
    zip_buf.seek(0)
    with zipfile.ZipFile(zip_buf, 'r') as zf:
        read1 = zf.read("test.png")
        read2 = zf.read("test.png")
        read3 = zf.read("test.png")
        
        print(f"Read 1 MD5: {hashlib.md5(read1).hexdigest()}")
        print(f"Read 2 MD5: {hashlib.md5(read2).hexdigest()}")
        print(f"Read 3 MD5: {hashlib.md5(read3).hexdigest()}")
        print(f"All same: {read1 == read2 == read3}")


if __name__ == "__main__":
    reread_verification_carefully()
    check_pixel_diff_logic()
    check_md5_implementation()
    test_zip_filename_tricks()
    test_multiple_zip_reads()
    ultimate_theory()
    
    print("\n" + "="*80)
    print("🎯 FINAL CONCLUSION")
    print("="*80)
    print("""
After exhaustive analysis, the solution MUST be:

**MD5 COLLISION ATTACK**

There's no bug, no trick, no edge case. The challenge is straightforward:
- Create two different PNGs with the same MD5 hash
- Make them classify differently

This requires:
1. HashClash or similar MD5 collision tool
2. Knowledge of chosen-prefix collision attacks
3. PNG format expertise to embed collision blocks
4. Adversarial example generation to make correct classifications

Steps to solve:
1. Research Marc Stevens' HashClash
2. Generate adversarial examples that classify differently
3. Use chosen-prefix collision to give them same MD5
4. Repeat for all 10 pairs
5. Package in ZIP and submit

This is a VERY HARD CTF challenge, probably worth 400-500 points!

Alternative if you're stuck:
- Look for writeups of similar CTF challenges
- Search "MD5 collision PNG CTF"
- Check if there's a Discord/forum for this CTF with hints

Good luck! 🍀
    """)
