#!/usr/bin/env python3
"""
Test de téléchargement d'une vidéo Instagram via l'API GraphQL publique
"""

import subprocess
import json
import logging
import os
import time
import re
import random
from urllib.parse import urlencode

# Configuration du logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

def generate_uuid():
    """Génère un UUID pour la simulation de session"""
    hexdigits = '0123456789abcdef'
    uuid = ''.join(random.choice(hexdigits) for _ in range(32))
    return f"{uuid[:8]}-{uuid[8:12]}-{uuid[12:16]}-{uuid[16:20]}-{uuid[20:]}"

def extract_post_id(url):
    """Extrait l'ID du post Instagram depuis l'URL"""
    patterns = [
        r'instagram\.com/p/([^/]+)',
        r'instagram\.com/reel/([^/]+)',
    ]
    for pattern in patterns:
        match = re.search(pattern, url)
        if match:
            return match.group(1).split('/')[0]
    return None

def get_instagram_session():
    """Récupère une session Instagram valide"""
    # User-Agent moderne et réaliste
    user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"
    
    # Headers de base
    headers = {
        'User-Agent': user_agent,
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.9',
        'Accept-Encoding': 'gzip, deflate',
        'Cache-Control': 'no-cache',
        'Pragma': 'no-cache',
        'Sec-Ch-Ua': '"Not A(Brand";v="24", "Chromium";v="121"',
        'Sec-Ch-Ua-Mobile': '?0',
        'Sec-Ch-Ua-Platform': '"Windows"',
        'Sec-Fetch-Dest': 'document',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'none',
        'Sec-Fetch-User': '?1',
        'Dnt': '1',
        'Upgrade-Insecure-Requests': '1'
    }
    
    # Première requête pour initialiser la session
    init_cmd = [
        'curl',
        'https://www.instagram.com/',
        '-H', f'User-Agent: {headers["User-Agent"]}',
        '-H', f'Accept: {headers["Accept"]}',
        '-H', f'Accept-Language: {headers["Accept-Language"]}',
        '-H', f'Sec-Ch-Ua: {headers["Sec-Ch-Ua"]}',
        '-H', f'Sec-Ch-Ua-Mobile: {headers["Sec-Ch-Ua-Mobile"]}',
        '-H', f'Sec-Ch-Ua-Platform: {headers["Sec-Ch-Ua-Platform"]}',
        '-H', f'Sec-Fetch-Dest: {headers["Sec-Fetch-Dest"]}',
        '-H', f'Sec-Fetch-Mode: {headers["Sec-Fetch-Mode"]}',
        '-H', f'Sec-Fetch-Site: {headers["Sec-Fetch-Site"]}',
        '-H', f'Sec-Fetch-User: {headers["Sec-Fetch-User"]}',
        '-H', f'Dnt: {headers["Dnt"]}',
        '-H', f'Upgrade-Insecure-Requests: {headers["Upgrade-Insecure-Requests"]}',
        '-c', 'instagram_cookies.txt',
        '-s'
    ]
    
    logger.debug("Initialisation de la session...")
    process = subprocess.Popen(init_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    stdout, stderr = process.communicate()
    
    if process.returncode != 0:
        logger.error(f"Erreur lors de l'initialisation: {stderr.decode()}")
        return None
        
    html_content = stdout.decode()
    
    # Extraire le token CSRF et l'App ID
    csrf_match = re.search(r'"csrf_token":"([^"]+)"', html_content)
    appid_match = re.search(r'"appId":"(\d+)"', html_content)
    
    if not csrf_match or not appid_match:
        logger.error("Impossible de trouver les tokens nécessaires")
        return None
        
    csrf_token = csrf_match.group(1)
    app_id = appid_match.group(1)
    
    # Générer un device ID
    device_id = generate_uuid()
    
    # Retourner la session
    return {
        'cookies_file': 'instagram_cookies.txt',
        'csrf_token': csrf_token,
        'app_id': app_id,
        'device_id': device_id,
        'user_agent': user_agent
    }

def download_instagram_video(url, output_dir="downloads"):
    """Télécharge une vidéo Instagram via l'API GraphQL publique"""
    
    logger.info(f"🔄 Téléchargement depuis Instagram: {url}")
    
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    post_id = extract_post_id(url)
    if not post_id:
        logger.error("❌ URL Instagram invalide")
        return None
        
    try:
        session = get_instagram_session()
        if not session:
            logger.error("❌ Impossible d'obtenir une session valide")
            return None
            
        # Headers pour l'API GraphQL
        headers = {
            'Authority': 'www.instagram.com',
            'Accept': '*/*',
            'Accept-Language': 'en-US,en;q=0.9',
            'Content-Type': 'application/x-www-form-urlencoded',
            'Origin': 'https://www.instagram.com',
            'Referer': f'https://www.instagram.com/reel/{post_id}/',
            'Sec-Ch-Ua': '"Not A(Brand";v="24", "Chromium";v="121"',
            'Sec-Ch-Ua-Mobile': '?0',
            'Sec-Ch-Ua-Platform': '"Windows"',
            'Sec-Fetch-Dest': 'empty',
            'Sec-Fetch-Mode': 'cors',
            'Sec-Fetch-Site': 'same-origin',
            'User-Agent': session['user_agent'],
            'X-Asbd-Id': '129477',
            'X-Csrftoken': session['csrf_token'],
            'X-Ig-App-Id': session['app_id'],
            'X-Ig-Www-Claim': '0',
            'X-Requested-With': 'XMLHttpRequest'
        }
        
        # Variables pour la requête GraphQL
        variables = {
            "shortcode": post_id,
            "__typename": "GraphVideo",
            "doc_id": "3217533874971376"
        }
        
        # Paramètres de la requête
        params = {
            "doc_id": variables["doc_id"],
            "variables": json.dumps(variables)
        }
        
        # Construction de l'URL avec les paramètres
        query_string = urlencode(params)
        api_url = f"https://www.instagram.com/graphql/query/?{query_string}"
        
        logger.info("Récupération des informations via GraphQL...")
        api_cmd = [
            'curl',
            api_url,
            '-H', f'Authority: {headers["Authority"]}',
            '-H', f'Accept: {headers["Accept"]}',
            '-H', f'Accept-Language: {headers["Accept-Language"]}',
            '-H', f'Content-Type: {headers["Content-Type"]}',
            '-H', f'Origin: {headers["Origin"]}',
            '-H', f'Referer: {headers["Referer"]}',
            '-H', f'Sec-Ch-Ua: {headers["Sec-Ch-Ua"]}',
            '-H', f'Sec-Ch-Ua-Mobile: {headers["Sec-Ch-Ua-Mobile"]}',
            '-H', f'Sec-Ch-Ua-Platform: {headers["Sec-Ch-Ua-Platform"]}',
            '-H', f'Sec-Fetch-Dest: {headers["Sec-Fetch-Dest"]}',
            '-H', f'Sec-Fetch-Mode: {headers["Sec-Fetch-Mode"]}',
            '-H', f'Sec-Fetch-Site: {headers["Sec-Fetch-Site"]}',
            '-H', f'User-Agent: {headers["User-Agent"]}',
            '-H', f'X-Asbd-Id: {headers["X-Asbd-Id"]}',
            '-H', f'X-Csrftoken: {headers["X-Csrftoken"]}',
            '-H', f'X-Ig-App-Id: {headers["X-Ig-App-Id"]}',
            '-H', f'X-Ig-Www-Claim: {headers["X-Ig-Www-Claim"]}',
            '-H', f'X-Requested-With: {headers["X-Requested-With"]}',
            '-b', session['cookies_file'],
            '--compressed',
            '-s'
        ]
        
        logger.debug(f"Requête GraphQL: {api_url}")
        
        process = subprocess.Popen(api_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        stdout, stderr = process.communicate()
        
        if process.returncode != 0:
            logger.error(f"❌ Erreur lors de la requête GraphQL: {stderr.decode()}")
            return None
            
        try:
            response = json.loads(stdout)
            logger.debug(f"Réponse GraphQL: {json.dumps(response, indent=2)}")
            
            # Extraire l'URL de la vidéo
            video_data = response.get('data', {}).get('shortcode_media', {})
            
            if not video_data:
                logger.error("❌ Pas de données média dans la réponse")
                return None
                
            if not video_data.get('is_video', False):
                logger.error("❌ Ce post n'est pas une vidéo")
                return None
                
            video_url = video_data.get('video_url')
            if not video_url:
                logger.error("❌ URL de la vidéo non trouvée")
                return None
                
            # Télécharger la vidéo
            output_file = os.path.join(output_dir, f"video_{int(time.time())}.mp4")
            logger.info("🔄 Téléchargement de la vidéo...")
            
            download_cmd = [
                'curl',
                video_url,
                '-L',
                '-o', output_file,
                '-H', f'User-Agent: {headers["User-Agent"]}',
                '-H', 'Accept: */*',
                '-H', f'Referer: {headers["Referer"]}',
                '-b', session['cookies_file'],
                '--compressed',
                '-s'
            ]
            
            download_process = subprocess.Popen(download_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            stdout, stderr = download_process.communicate()
            
            if download_process.returncode == 0 and os.path.exists(output_file):
                logger.info(f"✅ Vidéo téléchargée: {output_file}")
                return output_file
            else:
                logger.error(f"❌ Erreur lors du téléchargement: {stderr.decode()}")
                return None
                
        except json.JSONDecodeError:
            logger.error("❌ Erreur de décodage JSON de la réponse GraphQL")
            logger.error(f"Réponse reçue: {stdout.decode()}")
            return None
            
    except Exception as e:
        logger.error(f"❌ Erreur: {str(e)}")
        return None
    finally:
        # Nettoyer les fichiers temporaires
        if os.path.exists('instagram_cookies.txt'):
            os.remove('instagram_cookies.txt')

if __name__ == "__main__":
    url = "https://www.instagram.com/reel/C2_Kh6mPmGl/"
    result = download_instagram_video(url)
    
    if result:
        print(f"\n✅ Vidéo téléchargée avec succès: {result}")
    else:
        print("\n❌ Échec du téléchargement")
