import os import re import random import requests from urllib.parse import quote from pathlib import Path from dotenv import load_dotenv # Load environment variables load_dotenv() def process_colinkri_urls(dripfeed=7): """ Process URL files and send them to Colinkri API. Args: dripfeed (int): Number of days for drip feed. Default is 7. Returns: dict: Summary of processed, successful, and failed files """ api_key = os.getenv('COLINKRI_API_KEY') if not api_key: raise ValueError("COLINKRI_API_KEY not found in environment variables") # Setup directories base_dir = Path('deployment_logs') done_dir = base_dir / 'Done' failed_dir = base_dir / 'Failed' # Create directories if they don't exist done_dir.mkdir(parents=True, exist_ok=True) failed_dir.mkdir(parents=True, exist_ok=True) # Pattern to match files: YYYY-MM-DD_other_tiers_urls.txt pattern = re.compile(r'^\d{4}-\d{2}-\d{2}_other_tiers_urls\.txt$') # Get matching files matching_files = [f for f in base_dir.iterdir() if f.is_file() and pattern.match(f.name)] if not matching_files: print("No matching files found.") return {'processed': 0, 'successful': 0, 'failed': 0} results = {'processed': 0, 'successful': 0, 'failed': 0} for file_path in matching_files: results['processed'] += 1 campaign_name = file_path.stem # Filename without .txt print(f"\nProcessing: {file_path.name}") try: # Read URLs from file with open(file_path, 'r', encoding='utf-8') as f: urls = [line.strip() for line in f if line.strip()] if not urls: print(f" ⚠️ No URLs found in {file_path.name}") # Handle potential duplicate filenames in Failed folder destination = failed_dir / file_path.name counter = 1 while destination.exists(): new_name = f"{file_path.stem}_{counter}{file_path.suffix}" destination = failed_dir / new_name counter += 1 file_path.rename(destination) results['failed'] += 1 continue # Randomize URL order random.shuffle(urls) # Join URLs with pipe separator urls_param = '|'.join(urls) # Prepare API request api_url = 'https://www.colinkri.com/amember/crawler/api' # URL encode the parameters data = { 'apikey': api_key, 'campaignname': campaign_name, 'dripfeed': str(dripfeed), 'urls': urls_param } headers = { 'Content-Type': 'application/x-www-form-urlencoded' } # Send request print(f" 📤 Sending {len(urls)} URLs to Colinkri API...") response = requests.post(api_url, data=data, headers=headers, timeout=30) # Check response if response.status_code == 200: print(f" ✅ Success! Campaign: {campaign_name}") # Handle potential duplicate filenames in Done folder destination = done_dir / file_path.name counter = 1 while destination.exists(): # Add counter to filename if it already exists new_name = f"{file_path.stem}_{counter}{file_path.suffix}" destination = done_dir / new_name counter += 1 file_path.rename(destination) results['successful'] += 1 else: error_msg = f"API returned status code {response.status_code}: {response.text}" print(f" ❌ Failed: {error_msg}") # Handle potential duplicate filenames in Failed folder destination = failed_dir / file_path.name counter = 1 while destination.exists(): new_name = f"{file_path.stem}_{counter}{file_path.suffix}" destination = failed_dir / new_name counter += 1 # Log error to file error_log = failed_dir / f"{destination.stem}_error.log" with open(error_log, 'w', encoding='utf-8') as f: f.write(f"Error processing {file_path.name}\n") f.write(f"Status Code: {response.status_code}\n") f.write(f"Response: {response.text}\n") file_path.rename(destination) results['failed'] += 1 except Exception as e: print(f" ❌ Error: {str(e)}") # Handle potential duplicate filenames in Failed folder destination = failed_dir / file_path.name counter = 1 while destination.exists(): new_name = f"{file_path.stem}_{counter}{file_path.suffix}" destination = failed_dir / new_name counter += 1 # Log error to file error_log = failed_dir / f"{destination.stem}_error.log" with open(error_log, 'w', encoding='utf-8') as f: f.write(f"Error processing {file_path.name}\n") f.write(f"Exception: {str(e)}\n") file_path.rename(destination) results['failed'] += 1 # Print summary print("\n" + "="*50) print("SUMMARY") print("="*50) print(f"Files processed: {results['processed']}") print(f"Successful: {results['successful']}") print(f"Failed: {results['failed']}") print("="*50) return results if __name__ == '__main__': # Example usage process_colinkri_urls(dripfeed=7)