#!/usr/bin/env python3
"""
Bot de détection des posts Instagram promotionnels
"""

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import time
import json
import logging
import os
import sqlite3
from datetime import datetime
import re

# Configuration du logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

class InstagramBot:
    def __init__(self, db_path="instagram_posts.db"):
        """Initialise le bot avec une base de données SQLite"""
        self.db_path = db_path
        self.setup_database()
        self.driver = None

    def setup_database(self):
        """Crée la base de données et les tables si elles n'existent pas"""
        try:
            with sqlite3.connect(self.db_path) as conn:
                cursor = conn.cursor()
                
                # Table pour les posts Instagram
                cursor.execute('''
                CREATE TABLE IF NOT EXISTS posts (
                    id INTEGER PRIMARY KEY AUTOINCREMENT,
                    post_id TEXT UNIQUE,
                    url TEXT,
                    description TEXT,
                    is_promo INTEGER,
                    date_added TIMESTAMP DEFAULT CURRENT_TIMESTAMP
                )
                ''')
                
                # Table pour les mots-clés de détection
                cursor.execute('''
                CREATE TABLE IF NOT EXISTS keywords (
                    id INTEGER PRIMARY KEY AUTOINCREMENT,
                    word TEXT UNIQUE,
                    is_active INTEGER DEFAULT 1
                )
                ''')
                
                conn.commit()
                logger.info("✅ Base de données initialisée")
                
        except sqlite3.Error as e:
            logger.error(f"❌ Erreur lors de la création de la base de données: {e}")
            raise

    def setup_driver(self):
        """Configure le driver Selenium"""
        chrome_options = Options()
        chrome_options.add_argument('--headless')
        chrome_options.add_argument('--no-sandbox')
        chrome_options.add_argument('--disable-dev-shm-usage')
        chrome_options.add_argument(f'--user-data-dir=/tmp/chrome-data-{os.getpid()}')
        chrome_options.add_argument('--disable-blink-features=AutomationControlled')
        chrome_options.add_argument('--user-agent=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')
        
        # Options expérimentales pour éviter la détection
        chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
        chrome_options.add_experimental_option('useAutomationExtension', False)
        
        from webdriver_manager.chrome import ChromeDriverManager
        from selenium.webdriver.chrome.service import Service
        
        service = Service(ChromeDriverManager().install())
        driver = webdriver.Chrome(service=service, options=chrome_options)
        driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
        
        return driver

    def get_post_id(self, url):
        """Extrait l'ID du post depuis l'URL Instagram"""
        patterns = [
            r'instagram\.com/p/([^/]+)',
            r'instagram\.com/reel/([^/]+)'
        ]
        for pattern in patterns:
            match = re.search(pattern, url)
            if match:
                return match.group(1).split('/')[0]
        return None

    def is_post_processed(self, post_id):
        """Vérifie si un post a déjà été traité"""
        try:
            with sqlite3.connect(self.db_path) as conn:
                cursor = conn.cursor()
                cursor.execute("SELECT id FROM posts WHERE post_id = ?", (post_id,))
                return cursor.fetchone() is not None
        except sqlite3.Error as e:
            logger.error(f"❌ Erreur lors de la vérification du post: {e}")
            return False

    def get_post_bio(self, url):
        """Récupère la bio d'un post Instagram"""
        if not self.driver:
            self.driver = self.setup_driver()
        
        try:
            logger.info(f"🔍 Recherche de la bio: {url}")
            
            # Charger la page
            self.driver.get(url)
            time.sleep(3)  # Attente initiale
            
            wait = WebDriverWait(self.driver, 10)
            
            # Différentes méthodes pour trouver la description
            methods = [
                # Méthode 1: Meta description
                lambda: wait.until(EC.presence_of_element_located(
                    (By.CSS_SELECTOR, 'meta[property="og:description"]'))).get_attribute('content'),
                
                # Méthode 2: Article text
                lambda: wait.until(EC.presence_of_element_located(
                    (By.CSS_SELECTOR, 'article'))).text,
                
                # Méthode 3: Span dans article
                lambda: wait.until(EC.presence_of_element_located(
                    (By.CSS_SELECTOR, 'article span'))).text,
                
                # Méthode 4: Div avec rôle de description
                lambda: wait.until(EC.presence_of_element_located(
                    (By.CSS_SELECTOR, '[role="menuitem"]'))).text,
                
                # Méthode 5: Classes spécifiques Instagram
                lambda: wait.until(EC.presence_of_element_located(
                    (By.CSS_SELECTOR, 'span._aacl._aaco._aacu._aacx._aad7._aade'))).text,
            ]
            
            bio = None
            for method in methods:
                try:
                    bio = method()
                    if bio and len(bio.strip()) > 0:
                        logger.info(f"✅ Bio trouvée: {bio[:100]}...")
                        break
                except TimeoutException:
                    continue
                except Exception as e:
                    logger.debug(f"Erreur avec une méthode: {str(e)}")
                    continue
            
            if not bio:
                logger.warning("❌ Bio non trouvée")
                return None
                
            return bio.strip()
            
        except Exception as e:
            logger.error(f"❌ Erreur lors de la récupération de la bio: {e}")
            return None

    def get_keywords(self):
        """Récupère la liste des mots-clés actifs"""
        try:
            with sqlite3.connect(self.db_path) as conn:
                cursor = conn.cursor()
                cursor.execute("SELECT word FROM keywords WHERE is_active = 1")
                return [row[0] for row in cursor.fetchall()]
        except sqlite3.Error as e:
            logger.error(f"❌ Erreur lors de la récupération des mots-clés: {e}")
            return []

    def is_promo_content(self, description):
        """Détermine si le contenu est promotionnel basé sur la description"""
        if not description:
            return False
            
        keywords = self.get_keywords()
        if not keywords:
            # Liste par défaut si aucun mot-clé n'est défini
            keywords = ["sponsor", "sponsorisé", "partenariat", "pub", "promotion"]
            
        description = description.lower()
        
        # Chercher les mots-clés
        for keyword in keywords:
            if keyword.lower() in description:
                logger.info(f"✅ Mot-clé trouvé: {keyword}")
                return True
                
        return False

    def add_keyword(self, word):
        """Ajoute un nouveau mot-clé"""
        try:
            with sqlite3.connect(self.db_path) as conn:
                cursor = conn.cursor()
                cursor.execute("INSERT OR IGNORE INTO keywords (word) VALUES (?)", (word,))
                conn.commit()
                logger.info(f"✅ Mot-clé ajouté: {word}")
        except sqlite3.Error as e:
            logger.error(f"❌ Erreur lors de l'ajout du mot-clé: {e}")

    def save_post(self, url, post_id, description, is_promo):
        """Sauvegarde les informations d'un post"""
        try:
            with sqlite3.connect(self.db_path) as conn:
                cursor = conn.cursor()
                cursor.execute("""
                INSERT INTO posts (post_id, url, description, is_promo)
                VALUES (?, ?, ?, ?)
                """, (post_id, url, description, 1 if is_promo else 0))
                conn.commit()
                logger.info("✅ Post sauvegardé en base de données")
        except sqlite3.Error as e:
            logger.error(f"❌ Erreur lors de la sauvegarde du post: {e}")

    def process_post(self, url):
        """Traite un post Instagram"""
        # 1. Extraire l'ID du post
        post_id = self.get_post_id(url)
        if not post_id:
            logger.error("❌ URL Instagram invalide")
            return None
            
        # 2. Vérifier si déjà traité
        if self.is_post_processed(post_id):
            logger.info(f"ℹ️ Post déjà traité: {post_id}")
            return None
            
        # 3. Récupérer la bio
        bio = self.get_post_bio(url)
        if not bio:
            logger.error("❌ Impossible de récupérer la bio")
            return None
            
        # 4. Analyser le contenu
        is_promo = self.is_promo_content(bio)
        
        # 5. Sauvegarder les résultats
        self.save_post(url, post_id, bio, is_promo)
        
        return {
            'url': url,
            'post_id': post_id,
            'description': bio,
            'is_promo': is_promo
        }

    def close(self):
        """Ferme le driver Selenium"""
        if self.driver:
            self.driver.quit()
            self.driver = None

def main():
    """Fonction principale"""
    bot = InstagramBot()
    
    # Exemple d'utilisation
    urls = [
        "https://www.instagram.com/reel/DMpc37-tJdF/",
        # Ajoutez d'autres URLs à traiter
    ]
    
    try:
        for url in urls:
            result = bot.process_post(url)
            if result:
                print("\n📊 Résultat:")
                print(json.dumps(result, indent=2, ensure_ascii=False))
                
    finally:
        bot.close()

if __name__ == "__main__":
    main()
