"""
CRITICAL DISCOVERY TEST

Testing if PIL's context manager + convert() has any quirks
"""

import io
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))

from pathlib import Path
from PIL import Image
import numpy as np
import hashlib


def test_context_manager_convert_bug():
    """
    The code does:
        with Image.open(io.BytesIO(data)) as img:
            return img.convert("L")
    
    What if the convert() creates a NEW image that shares some state?
    Or what if subsequent calls have different behavior?
    """
    print("="*80)
    print("TESTING CONTEXT MANAGER + CONVERT PATTERN")
    print("="*80)
    
    refs_dir = Path("src/data/refs")
    ref_path = refs_dir / "ref_00.png"
    
    with open(ref_path, 'rb') as f:
        img_bytes = f.read()
    
    def load_gray_from_bytes(data: bytes):
        with Image.open(io.BytesIO(data)) as img:
            return img.convert("L")
    
    # Call it twice with same bytes
    img1 = load_gray_from_bytes(img_bytes)
    img2 = load_gray_from_bytes(img_bytes)
    
    # Check if they're different objects
    print(f"\nSame object: {img1 is img2}")
    print(f"img1 id: {id(img1)}")
    print(f"img2 id: {id(img2)}")
    
    # Check pixel data
    arr1 = np.array(img1)
    arr2 = np.array(img2)
    
    print(f"\nArrays equal: {np.array_equal(arr1, arr2)}")
    print(f"Pixel diff count: {np.sum(arr1 != arr2)}")
    
    # Check if the PIL Image objects have any different state
    print(f"\nimg1.mode: {img1.mode}, img2.mode: {img2.mode}")
    print(f"img1.size: {img1.size}, img2.size: {img2.size}")
    
    # Check if there's any internal state difference
    print(f"img1.readonly: {img1.readonly if hasattr(img1, 'readonly') else 'N/A'}")
    print(f"img2.readonly: {img2.readonly if hasattr(img2, 'readonly') else 'N/A'}")
    
    # Test with model prediction
    from model import get_model_bundle, predict_top1, set_deterministic
    
    set_deterministic(0)
    bundle = get_model_bundle(device="cpu")
    
    pred1 = predict_top1(img1, bundle=bundle)
    pred2 = predict_top1(img2, bundle=bundle)
    
    print(f"\nPrediction 1: {pred1}")
    print(f"Prediction 2: {pred2}")
    print(f"Same classification: {pred1['id'] == pred2['id']}")


def test_file_object_reuse():
    """
    What if we reuse the same BytesIO object?
    """
    print("\n" + "="*80)
    print("TESTING BytesIO REUSE")
    print("="*80)
    
    refs_dir = Path("src/data/refs")
    ref_path = refs_dir / "ref_00.png"
    
    with open(ref_path, 'rb') as f:
        img_bytes = f.read()
    
    # Use the SAME BytesIO object (without seek)
    bio = io.BytesIO(img_bytes)
    
    def load_no_seek(bio_obj):
        with Image.open(bio_obj) as img:
            return img.convert("L")
    
    img1 = load_no_seek(bio)
    # bio position is now at end!
    print(f"BytesIO position after first load: {bio.tell()}")
    
    # Try to load again without seek
    try:
        img2 = load_no_seek(bio)
        print(f"Second load succeeded!")
        
        arr1 = np.array(img1)
        arr2 = np.array(img2)
        print(f"Arrays equal: {np.array_equal(arr1, arr2)}")
    except Exception as e:
        print(f"Second load failed: {e}")
        print("This could be a clue!")


def test_with_different_png_types():
    """
    Test with different reference images
    """
    print("\n" + "="*80)
    print("TESTING WITH DIFFERENT REFERENCE IMAGES")
    print("="*80)
    
    from model import get_model_bundle, predict_top1, set_deterministic
    
    set_deterministic(0)
    bundle = get_model_bundle(device="cpu")
    
    refs_dir = Path("src/data/refs")
    
    for i in range(3):  # Test first 3
        ref_path = refs_dir / f"ref_{i:02d}.png"
        
        with open(ref_path, 'rb') as f:
            img_bytes = f.read()
        
        def load_gray(data):
            with Image.open(io.BytesIO(data)) as img:
                return img.convert("L")
        
        # Load 3 times
        preds = []
        for trial in range(3):
            img = load_gray(img_bytes)
            pred = predict_top1(img, bundle=bundle)
            preds.append(pred['id'])
        
        consistent = len(set(preds)) == 1
        print(f"ref_{i:02d}: classifications = {preds}, consistent = {consistent}")


def deep_dive_md5_check():
    """
    Maybe there's something wrong with how MD5 is checked?
    """
    print("\n" + "="*80)
    print("DEEP DIVE INTO MD5 CHECK")
    print("="*80)
    
    print("\nThe verification does:")
    print("  if md5_hex(img1_bytes) != md5_hex(img2_bytes):")
    print("      return False")
    print("\nThis means: if MD5s are DIFFERENT, fail")
    print("So MD5s must be IDENTICAL to pass")
    
    print("\n💡 But wait... what if img1_bytes or img2_bytes is empty or None?")
    
    # Test edge cases
    empty = b""
    none_md5_test = hashlib.md5(empty).hexdigest()
    print(f"\nMD5 of empty bytes: {none_md5_test}")
    
    print("\nWhat if verification has a bug where it doesn't properly validate?")
    print("Let me check if there's any way the check could be bypassed...")


if __name__ == "__main__":
    test_context_manager_convert_bug()
    test_file_object_reuse()
    test_with_different_png_types()
    deep_dive_md5_check()
    
    print("\n" + "="*80)
    print("💡 BREAKTHROUGH INSIGHT NEEDED")
    print("="*80)
    print("""
After all these tests, everything is deterministic!

For a CTF challenge, the most likely solutions are:

1. 🎯 MD5 COLLISION ATTACK (MOST LIKELY):
   This is a known technique in CTF challenges.
   - Use tools like HashClash to create chosen-prefix collisions
   - Create two different PNGs that hash to same MD5
   - Make one classify as digit X, other as digit Y
   - This is HARD but doable with right tools

2. 🐛 SUBTLE BUG WE HAVEN'T FOUND:
   - Maybe there's a race condition?
   - Maybe there's something with how ZIP stores files?
   - Maybe PIL has a version-specific bug?

3. 🎨 PNG CHUNK MANIPULATION:
   - Maybe we can create PNGs that PIL parses differently
   - Could use ancillary chunks creatively
   - Gamma correction, color profiles, etc.

4. 📝 THE CHALLENGE IS ACTUALLY IMPOSSIBLE:
   - And the point is to recognize that?
   - "Nothing Ever Changes" = philosophical commentary?

Let me create a guide for approach #1 (MD5 collisions)...
    """)
