#!/usr/bin/env python3
"""
Script pour tester différentes approches Instagram
"""

import requests
import re
import json
from bs4 import BeautifulSoup
import logging
import time

# Configuration du logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def test_instagram_api():
    """Test de différentes approches pour Instagram"""
    
    # URL de test
    url = "https://www.instagram.com/reel/DMpc37-tJdF/"
    
    print(f"🔍 Test différentes approches Instagram: {url}")
    
    # Headers pour simuler un navigateur mobile
    headers = {
        'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Language': 'fr-FR,fr;q=0.9,en;q=0.8',
        'Accept-Encoding': 'gzip, deflate',
        'Connection': 'keep-alive'
    }
    
    session = requests.Session()
    session.headers.update(headers)
    
    try:
        # Test 1: URL mobile
        print("\n📱 Test 1: URL mobile")
        mobile_url = url.replace('instagram.com', 'm.instagram.com')
        response = session.get(mobile_url, timeout=30)
        print(f"Status: {response.status_code}")
        print(f"Content length: {len(response.text)}")
        
        if response.status_code == 200:
            with open('instagram_mobile.html', 'w', encoding='utf-8') as f:
                f.write(response.text)
            print("✅ Contenu mobile sauvegardé")
            
            # Analyser le contenu mobile
            soup = BeautifulSoup(response.text, 'html.parser')
            
            # Chercher les métadonnées
            meta_tags = soup.find_all('meta')
            for meta in meta_tags:
                property_attr = meta.get('property', '')
                content_attr = meta.get('content', '')
                if 'description' in property_attr.lower():
                    print(f"✅ Mobile description: {content_attr[:100]}...")
                elif 'duration' in property_attr.lower():
                    print(f"✅ Mobile duration: {content_attr}")
        
        # Test 2: API publique Instagram (si disponible)
        print("\n🔌 Test 2: API publique")
        shortcode = url.split('/reel/')[1].split('/')[0]
        api_url = f"https://www.instagram.com/api/v1/media/{shortcode}/info/"
        
        api_headers = headers.copy()
        api_headers.update({
            'X-Requested-With': 'XMLHttpRequest',
            'Referer': url
        })
        
        response_api = session.get(api_url, headers=api_headers, timeout=30)
        print(f"API Status: {response_api.status_code}")
        
        if response_api.status_code == 200:
            try:
                api_data = response_api.json()
                print(f"✅ API data: {str(api_data)[:200]}...")
                
                # Sauvegarder les données API
                with open('instagram_api.json', 'w', encoding='utf-8') as f:
                    json.dump(api_data, f, indent=2)
                print("✅ Données API sauvegardées")
                
            except Exception as e:
                print(f"❌ Erreur parsing API: {e}")
        else:
            print(f"❌ API non accessible: {response_api.status_code}")
        
        # Test 3: GraphQL endpoint
        print("\n📊 Test 3: GraphQL endpoint")
        graphql_url = "https://www.instagram.com/graphql/query/"
        
        graphql_headers = headers.copy()
        graphql_headers.update({
            'X-Requested-With': 'XMLHttpRequest',
            'Referer': url,
            'Content-Type': 'application/json'
        })
        
        # Query GraphQL pour récupérer les infos du media
        graphql_query = {
            "query_hash": "9f8827793ef34641b2fb195d4d41151c",
            "variables": json.dumps({
                "shortcode": shortcode
            })
        }
        
        response_graphql = session.get(graphql_url, params=graphql_query, headers=graphql_headers, timeout=30)
        print(f"GraphQL Status: {response_graphql.status_code}")
        
        if response_graphql.status_code == 200:
            try:
                graphql_data = response_graphql.json()
                print(f"✅ GraphQL data: {str(graphql_data)[:200]}...")
                
                # Sauvegarder les données GraphQL
                with open('instagram_graphql.json', 'w', encoding='utf-8') as f:
                    json.dump(graphql_data, f, indent=2)
                print("✅ Données GraphQL sauvegardées")
                
            except Exception as e:
                print(f"❌ Erreur parsing GraphQL: {e}")
        else:
            print(f"❌ GraphQL non accessible: {response_graphql.status_code}")
        
        # Test 4: Essayer avec un User-Agent différent
        print("\n🤖 Test 4: User-Agent différent")
        headers_bot = {
            'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.5',
            'Accept-Encoding': 'gzip, deflate',
            'Connection': 'keep-alive'
        }
        
        session_bot = requests.Session()
        session_bot.headers.update(headers_bot)
        
        response_bot = session_bot.get(url, timeout=30)
        print(f"Bot Status: {response_bot.status_code}")
        print(f"Bot Content length: {len(response_bot.text)}")
        
        if response_bot.status_code == 200:
            with open('instagram_bot.html', 'w', encoding='utf-8') as f:
                f.write(response_bot.text)
            print("✅ Contenu bot sauvegardé")
            
            # Analyser le contenu bot
            soup_bot = BeautifulSoup(response_bot.text, 'html.parser')
            
            # Chercher les métadonnées
            meta_tags = soup_bot.find_all('meta')
            for meta in meta_tags:
                property_attr = meta.get('property', '')
                content_attr = meta.get('content', '')
                if 'description' in property_attr.lower():
                    print(f"✅ Bot description: {content_attr[:100]}...")
                elif 'duration' in property_attr.lower():
                    print(f"✅ Bot duration: {content_attr}")
        
        # Test 5: Analyser les fichiers sauvegardés
        print("\n📊 Test 5: Analyse des fichiers sauvegardés")
        
        files_to_check = ['instagram_mobile.html', 'instagram_api.json', 'instagram_graphql.json', 'instagram_bot.html']
        
        for filename in files_to_check:
            try:
                with open(filename, 'r', encoding='utf-8') as f:
                    content = f.read()
                
                print(f"\n🔍 Analyse de {filename}:")
                
                # Chercher des patterns spécifiques
                patterns = [
                    r'"duration":\s*"?(\d+)"?',
                    r'"video_duration":\s*"?(\d+)"?',
                    r'"media_duration":\s*"?(\d+)"?',
                    r'"caption":\s*"([^"]*)"',
                    r'"text":\s*"([^"]*)"',
                    r'"description":\s*"([^"]*)"',
                    r'"title":\s*"([^"]*)"'
                ]
                
                for pattern in patterns:
                    matches = re.findall(pattern, content, re.IGNORECASE | re.DOTALL)
                    if matches:
                        print(f"  ✅ Pattern '{pattern}' trouvé: {matches[0][:100]}...")
                
            except FileNotFoundError:
                print(f"  ❌ Fichier {filename} non trouvé")
            except Exception as e:
                print(f"  ❌ Erreur lecture {filename}: {e}")
        
    except Exception as e:
        print(f"❌ Erreur générale: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    test_instagram_api() 