169 lines
6.0 KiB
Python
169 lines
6.0 KiB
Python
import os
|
|
import re
|
|
import random
|
|
import requests
|
|
from urllib.parse import quote
|
|
from pathlib import Path
|
|
from dotenv import load_dotenv
|
|
|
|
# Load environment variables
|
|
load_dotenv()
|
|
|
|
|
|
def process_colinkri_urls(dripfeed=7):
|
|
"""
|
|
Process URL files and send them to Colinkri API.
|
|
|
|
Args:
|
|
dripfeed (int): Number of days for drip feed. Default is 7.
|
|
|
|
Returns:
|
|
dict: Summary of processed, successful, and failed files
|
|
"""
|
|
api_key = os.getenv('COLINKRI_API_KEY')
|
|
if not api_key:
|
|
raise ValueError("COLINKRI_API_KEY not found in environment variables")
|
|
|
|
# Setup directories
|
|
base_dir = Path('deployment_logs')
|
|
done_dir = base_dir / 'Done'
|
|
failed_dir = base_dir / 'Failed'
|
|
|
|
# Create directories if they don't exist
|
|
done_dir.mkdir(parents=True, exist_ok=True)
|
|
failed_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Pattern to match files: YYYY-MM-DD_other_tiers_urls.txt
|
|
pattern = re.compile(r'^\d{4}-\d{2}-\d{2}_other_tiers_urls\.txt$')
|
|
|
|
# Get matching files
|
|
matching_files = [f for f in base_dir.iterdir()
|
|
if f.is_file() and pattern.match(f.name)]
|
|
|
|
if not matching_files:
|
|
print("No matching files found.")
|
|
return {'processed': 0, 'successful': 0, 'failed': 0}
|
|
|
|
results = {'processed': 0, 'successful': 0, 'failed': 0}
|
|
|
|
for file_path in matching_files:
|
|
results['processed'] += 1
|
|
campaign_name = file_path.stem # Filename without .txt
|
|
|
|
print(f"\nProcessing: {file_path.name}")
|
|
|
|
try:
|
|
# Read URLs from file
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
urls = [line.strip() for line in f if line.strip()]
|
|
|
|
if not urls:
|
|
print(f" ⚠️ No URLs found in {file_path.name}")
|
|
|
|
# Handle potential duplicate filenames in Failed folder
|
|
destination = failed_dir / file_path.name
|
|
counter = 1
|
|
while destination.exists():
|
|
new_name = f"{file_path.stem}_{counter}{file_path.suffix}"
|
|
destination = failed_dir / new_name
|
|
counter += 1
|
|
|
|
file_path.rename(destination)
|
|
results['failed'] += 1
|
|
continue
|
|
# Randomize URL order
|
|
random.shuffle(urls)
|
|
# Join URLs with pipe separator
|
|
urls_param = '|'.join(urls)
|
|
|
|
# Prepare API request
|
|
api_url = 'https://www.colinkri.com/amember/crawler/api'
|
|
|
|
# URL encode the parameters
|
|
data = {
|
|
'apikey': api_key,
|
|
'campaignname': campaign_name,
|
|
'dripfeed': str(dripfeed),
|
|
'urls': urls_param
|
|
}
|
|
|
|
headers = {
|
|
'Content-Type': 'application/x-www-form-urlencoded'
|
|
}
|
|
|
|
# Send request
|
|
print(f" 📤 Sending {len(urls)} URLs to Colinkri API...")
|
|
response = requests.post(api_url, data=data, headers=headers, timeout=30)
|
|
|
|
# Check response
|
|
if response.status_code == 200:
|
|
print(f" ✅ Success! Campaign: {campaign_name}")
|
|
|
|
# Handle potential duplicate filenames in Done folder
|
|
destination = done_dir / file_path.name
|
|
counter = 1
|
|
while destination.exists():
|
|
# Add counter to filename if it already exists
|
|
new_name = f"{file_path.stem}_{counter}{file_path.suffix}"
|
|
destination = done_dir / new_name
|
|
counter += 1
|
|
|
|
file_path.rename(destination)
|
|
results['successful'] += 1
|
|
else:
|
|
error_msg = f"API returned status code {response.status_code}: {response.text}"
|
|
print(f" ❌ Failed: {error_msg}")
|
|
|
|
# Handle potential duplicate filenames in Failed folder
|
|
destination = failed_dir / file_path.name
|
|
counter = 1
|
|
while destination.exists():
|
|
new_name = f"{file_path.stem}_{counter}{file_path.suffix}"
|
|
destination = failed_dir / new_name
|
|
counter += 1
|
|
|
|
# Log error to file
|
|
error_log = failed_dir / f"{destination.stem}_error.log"
|
|
with open(error_log, 'w', encoding='utf-8') as f:
|
|
f.write(f"Error processing {file_path.name}\n")
|
|
f.write(f"Status Code: {response.status_code}\n")
|
|
f.write(f"Response: {response.text}\n")
|
|
|
|
file_path.rename(destination)
|
|
results['failed'] += 1
|
|
|
|
except Exception as e:
|
|
print(f" ❌ Error: {str(e)}")
|
|
|
|
# Handle potential duplicate filenames in Failed folder
|
|
destination = failed_dir / file_path.name
|
|
counter = 1
|
|
while destination.exists():
|
|
new_name = f"{file_path.stem}_{counter}{file_path.suffix}"
|
|
destination = failed_dir / new_name
|
|
counter += 1
|
|
|
|
# Log error to file
|
|
error_log = failed_dir / f"{destination.stem}_error.log"
|
|
with open(error_log, 'w', encoding='utf-8') as f:
|
|
f.write(f"Error processing {file_path.name}\n")
|
|
f.write(f"Exception: {str(e)}\n")
|
|
|
|
file_path.rename(destination)
|
|
results['failed'] += 1
|
|
|
|
# Print summary
|
|
print("\n" + "="*50)
|
|
print("SUMMARY")
|
|
print("="*50)
|
|
print(f"Files processed: {results['processed']}")
|
|
print(f"Successful: {results['successful']}")
|
|
print(f"Failed: {results['failed']}")
|
|
print("="*50)
|
|
|
|
return results
|
|
|
|
|
|
if __name__ == '__main__':
|
|
# Example usage
|
|
process_colinkri_urls(dripfeed=7) |