local db works with limited reporting
commit
3b99e345ed
Binary file not shown.
|
|
@ -0,0 +1,502 @@
|
||||||
|
from fastapi import FastAPI, HTTPException
|
||||||
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
from fastapi.responses import PlainTextResponse
|
||||||
|
from pydantic import BaseModel, HttpUrl
|
||||||
|
from typing import List, Optional
|
||||||
|
from datetime import datetime
|
||||||
|
import sqlite3
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
app = FastAPI(title="Link Tracker API", version="1.0.0")
|
||||||
|
|
||||||
|
# Enable CORS for your Chrome extension
|
||||||
|
app.add_middleware(
|
||||||
|
CORSMiddleware,
|
||||||
|
allow_origins=["*"], # In production, restrict this to specific origins
|
||||||
|
allow_credentials=True,
|
||||||
|
allow_methods=["*"],
|
||||||
|
allow_headers=["*"],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Database setup
|
||||||
|
DB_PATH = "link_tracker.db"
|
||||||
|
|
||||||
|
def init_db():
|
||||||
|
"""Initialize the database with required tables"""
|
||||||
|
conn = sqlite3.connect(DB_PATH)
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
# Pages table - stores captured page information
|
||||||
|
cursor.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS pages (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
url TEXT UNIQUE NOT NULL,
|
||||||
|
title TEXT NOT NULL,
|
||||||
|
keywords TEXT, -- JSON array of keywords
|
||||||
|
timestamp DATETIME NOT NULL,
|
||||||
|
detected_clients TEXT, -- JSON array of detected clients
|
||||||
|
total_links INTEGER NOT NULL,
|
||||||
|
linked_to TEXT, -- JSON array of client URLs this page links to
|
||||||
|
colinkiri BOOLEAN DEFAULT FALSE,
|
||||||
|
indexer BOOLEAN DEFAULT FALSE,
|
||||||
|
t2 BOOLEAN DEFAULT FALSE,
|
||||||
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
|
||||||
|
# For existing databases, add the new columns if they don't exist
|
||||||
|
try:
|
||||||
|
cursor.execute("ALTER TABLE pages ADD COLUMN colinkiri BOOLEAN DEFAULT FALSE")
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
pass # Column already exists
|
||||||
|
|
||||||
|
try:
|
||||||
|
cursor.execute("ALTER TABLE pages ADD COLUMN indexer BOOLEAN DEFAULT FALSE")
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
pass # Column already exists
|
||||||
|
|
||||||
|
try:
|
||||||
|
cursor.execute("ALTER TABLE pages ADD COLUMN t2 BOOLEAN DEFAULT FALSE")
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
pass # Column already exists
|
||||||
|
|
||||||
|
try:
|
||||||
|
cursor.execute("ALTER TABLE pages ADD COLUMN linked_to TEXT")
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
pass # Column already exists
|
||||||
|
|
||||||
|
# Links table - stores all external links found on pages
|
||||||
|
cursor.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS links (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
page_id INTEGER,
|
||||||
|
href TEXT NOT NULL,
|
||||||
|
anchor_text TEXT,
|
||||||
|
title_attr TEXT,
|
||||||
|
domain TEXT NOT NULL,
|
||||||
|
is_client_link BOOLEAN DEFAULT FALSE,
|
||||||
|
client_domain TEXT,
|
||||||
|
client_name TEXT,
|
||||||
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
FOREIGN KEY (page_id) REFERENCES pages (id)
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
|
||||||
|
# Create indexes for better query performance
|
||||||
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_pages_url ON pages (url)")
|
||||||
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_links_domain ON links (domain)")
|
||||||
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_links_client_domain ON links (client_domain)")
|
||||||
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_links_page_id ON links (page_id)")
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
# Pydantic models for API requests
|
||||||
|
class LinkData(BaseModel):
|
||||||
|
href: str
|
||||||
|
text: Optional[str] = ""
|
||||||
|
title: Optional[str] = ""
|
||||||
|
|
||||||
|
class DetectedClient(BaseModel):
|
||||||
|
domain: str
|
||||||
|
name: str
|
||||||
|
|
||||||
|
class PageCaptureRequest(BaseModel):
|
||||||
|
url: str
|
||||||
|
title: str
|
||||||
|
timestamp: str
|
||||||
|
keywords: List[str]
|
||||||
|
detectedClients: List[DetectedClient]
|
||||||
|
totalLinks: int
|
||||||
|
links: List[LinkData]
|
||||||
|
|
||||||
|
# API Response models
|
||||||
|
class PageSummary(BaseModel):
|
||||||
|
id: int
|
||||||
|
url: str
|
||||||
|
title: str
|
||||||
|
timestamp: str
|
||||||
|
detected_clients: List[str]
|
||||||
|
total_links: int
|
||||||
|
client_links_count: int
|
||||||
|
|
||||||
|
class LinkSummary(BaseModel):
|
||||||
|
href: str
|
||||||
|
anchor_text: str
|
||||||
|
domain: str
|
||||||
|
is_client_link: bool
|
||||||
|
client_name: Optional[str] = None
|
||||||
|
|
||||||
|
@app.on_event("startup")
|
||||||
|
async def startup_event():
|
||||||
|
"""Initialize database on startup"""
|
||||||
|
init_db()
|
||||||
|
|
||||||
|
@app.get("/")
|
||||||
|
async def root():
|
||||||
|
"""Health check endpoint"""
|
||||||
|
return {"message": "Link Tracker API is running"}
|
||||||
|
|
||||||
|
@app.post("/capture-page")
|
||||||
|
async def capture_page(data: PageCaptureRequest):
|
||||||
|
"""Capture page data and links from Chrome extension"""
|
||||||
|
try:
|
||||||
|
print(f"Received data: {data}") # Debug logging
|
||||||
|
|
||||||
|
conn = sqlite3.connect(DB_PATH)
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
# Check if page already exists
|
||||||
|
cursor.execute("SELECT id FROM pages WHERE url = ?", (data.url,))
|
||||||
|
existing_page = cursor.fetchone()
|
||||||
|
|
||||||
|
# Get client domains for faster lookup
|
||||||
|
client_domains = {c.domain: c.name for c in data.detectedClients}
|
||||||
|
|
||||||
|
# Collect client URLs for the linked_to field
|
||||||
|
client_urls = []
|
||||||
|
|
||||||
|
if existing_page:
|
||||||
|
# Update existing page
|
||||||
|
page_id = existing_page[0]
|
||||||
|
cursor.execute("""
|
||||||
|
UPDATE pages
|
||||||
|
SET title = ?, keywords = ?, timestamp = ?,
|
||||||
|
detected_clients = ?, total_links = ?, linked_to = ?
|
||||||
|
WHERE id = ?
|
||||||
|
""", (
|
||||||
|
data.title,
|
||||||
|
json.dumps(data.keywords),
|
||||||
|
data.timestamp,
|
||||||
|
json.dumps([{"domain": c.domain, "name": c.name} for c in data.detectedClients]),
|
||||||
|
data.totalLinks,
|
||||||
|
json.dumps([]), # Will be populated below
|
||||||
|
page_id
|
||||||
|
))
|
||||||
|
|
||||||
|
# Delete existing links for this page
|
||||||
|
cursor.execute("DELETE FROM links WHERE page_id = ?", (page_id,))
|
||||||
|
else:
|
||||||
|
# Insert new page
|
||||||
|
cursor.execute("""
|
||||||
|
INSERT INTO pages (url, title, keywords, timestamp, detected_clients, total_links, linked_to)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||||
|
""", (
|
||||||
|
data.url,
|
||||||
|
data.title,
|
||||||
|
json.dumps(data.keywords),
|
||||||
|
data.timestamp,
|
||||||
|
json.dumps([{"domain": c.domain, "name": c.name} for c in data.detectedClients]),
|
||||||
|
data.totalLinks,
|
||||||
|
json.dumps([]) # Will be populated below
|
||||||
|
))
|
||||||
|
page_id = cursor.lastrowid
|
||||||
|
|
||||||
|
# Insert links
|
||||||
|
for link in data.links:
|
||||||
|
try:
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
parsed_url = urlparse(link.href)
|
||||||
|
domain = parsed_url.netloc.replace('www.', '')
|
||||||
|
|
||||||
|
# Check if this is a client link
|
||||||
|
is_client_link = domain in client_domains
|
||||||
|
client_name = client_domains.get(domain) if is_client_link else None
|
||||||
|
|
||||||
|
# If it's a client link, add to linked_to array
|
||||||
|
if is_client_link:
|
||||||
|
client_urls.append(link.href)
|
||||||
|
|
||||||
|
cursor.execute("""
|
||||||
|
INSERT INTO links (page_id, href, anchor_text, title_attr, domain,
|
||||||
|
is_client_link, client_domain, client_name)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
|
""", (
|
||||||
|
page_id,
|
||||||
|
link.href,
|
||||||
|
link.text or "",
|
||||||
|
link.title or "",
|
||||||
|
domain,
|
||||||
|
is_client_link,
|
||||||
|
domain if is_client_link else None,
|
||||||
|
client_name
|
||||||
|
))
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error processing link {link.href}: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Update the linked_to field with collected client URLs
|
||||||
|
cursor.execute("UPDATE pages SET linked_to = ? WHERE id = ?", (json.dumps(client_urls), page_id))
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"message": f"Captured {data.totalLinks} links from {data.url}",
|
||||||
|
"page_id": page_id,
|
||||||
|
"detected_clients": len(data.detectedClients)
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error details: {e}") # Debug logging
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
raise HTTPException(status_code=500, detail=f"Error capturing page data: {str(e)}")
|
||||||
|
|
||||||
|
@app.get("/pages", response_model=List[PageSummary])
|
||||||
|
async def get_pages(limit: int = 50, offset: int = 0):
|
||||||
|
"""Get list of captured pages"""
|
||||||
|
try:
|
||||||
|
conn = sqlite3.connect(DB_PATH)
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
cursor.execute("""
|
||||||
|
SELECT p.id, p.url, p.title, p.timestamp, p.detected_clients, p.total_links,
|
||||||
|
COUNT(l.id) as client_links_count
|
||||||
|
FROM pages p
|
||||||
|
LEFT JOIN links l ON p.id = l.page_id AND l.is_client_link = 1
|
||||||
|
GROUP BY p.id
|
||||||
|
ORDER BY p.created_at DESC
|
||||||
|
LIMIT ? OFFSET ?
|
||||||
|
""", (limit, offset))
|
||||||
|
|
||||||
|
pages = []
|
||||||
|
for row in cursor.fetchall():
|
||||||
|
detected_clients_data = json.loads(row[4]) if row[4] else []
|
||||||
|
client_names = [c["name"] for c in detected_clients_data]
|
||||||
|
|
||||||
|
pages.append(PageSummary(
|
||||||
|
id=row[0],
|
||||||
|
url=row[1],
|
||||||
|
title=row[2],
|
||||||
|
timestamp=row[3],
|
||||||
|
detected_clients=client_names,
|
||||||
|
total_links=row[5],
|
||||||
|
client_links_count=row[6]
|
||||||
|
))
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
return pages
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Error fetching pages: {str(e)}")
|
||||||
|
|
||||||
|
@app.get("/pages/{page_id}/links", response_model=List[LinkSummary])
|
||||||
|
async def get_page_links(page_id: int):
|
||||||
|
"""Get all links for a specific page"""
|
||||||
|
try:
|
||||||
|
conn = sqlite3.connect(DB_PATH)
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
cursor.execute("""
|
||||||
|
SELECT href, anchor_text, domain, is_client_link, client_name
|
||||||
|
FROM links
|
||||||
|
WHERE page_id = ?
|
||||||
|
ORDER BY is_client_link DESC, domain ASC
|
||||||
|
""", (page_id,))
|
||||||
|
|
||||||
|
links = []
|
||||||
|
for row in cursor.fetchall():
|
||||||
|
links.append(LinkSummary(
|
||||||
|
href=row[0],
|
||||||
|
anchor_text=row[1],
|
||||||
|
domain=row[2],
|
||||||
|
is_client_link=bool(row[3]),
|
||||||
|
client_name=row[4]
|
||||||
|
))
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
return links
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Error fetching links: {str(e)}")
|
||||||
|
|
||||||
|
@app.get("/clients/{client_domain}/links")
|
||||||
|
async def get_client_links(client_domain: str, limit: int = 100):
|
||||||
|
"""Get all links pointing to a specific client domain"""
|
||||||
|
try:
|
||||||
|
conn = sqlite3.connect(DB_PATH)
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
cursor.execute("""
|
||||||
|
SELECT l.href, l.anchor_text, p.url as source_page, p.title as source_title,
|
||||||
|
l.client_name, p.timestamp
|
||||||
|
FROM links l
|
||||||
|
JOIN pages p ON l.page_id = p.id
|
||||||
|
WHERE l.client_domain = ?
|
||||||
|
ORDER BY p.timestamp DESC
|
||||||
|
LIMIT ?
|
||||||
|
""", (client_domain, limit))
|
||||||
|
|
||||||
|
links = []
|
||||||
|
for row in cursor.fetchall():
|
||||||
|
links.append({
|
||||||
|
"target_url": row[0],
|
||||||
|
"anchor_text": row[1],
|
||||||
|
"source_page": row[2],
|
||||||
|
"source_title": row[3],
|
||||||
|
"client_name": row[4],
|
||||||
|
"timestamp": row[5]
|
||||||
|
})
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
return {"client_domain": client_domain, "links": links}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Error fetching client links: {str(e)}")
|
||||||
|
|
||||||
|
@app.get("/urls/for-colinkiri", response_class=PlainTextResponse)
|
||||||
|
async def get_urls_for_colinkiri():
|
||||||
|
"""Get all URLs where colinkiri=false and mark them as processed"""
|
||||||
|
try:
|
||||||
|
conn = sqlite3.connect(DB_PATH)
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
# Get unprocessed URLs
|
||||||
|
cursor.execute("""
|
||||||
|
SELECT id, url
|
||||||
|
FROM pages
|
||||||
|
WHERE colinkiri = FALSE
|
||||||
|
ORDER BY created_at ASC
|
||||||
|
""")
|
||||||
|
|
||||||
|
page_ids = []
|
||||||
|
url_list = []
|
||||||
|
for row in cursor.fetchall():
|
||||||
|
page_ids.append(row[0])
|
||||||
|
url_list.append(row[1])
|
||||||
|
|
||||||
|
# Mark them as processed
|
||||||
|
if page_ids:
|
||||||
|
placeholders = ','.join(['?'] * len(page_ids))
|
||||||
|
cursor.execute(f"UPDATE pages SET colinkiri = TRUE WHERE id IN ({placeholders})", page_ids)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
# Return URLs as plain text, one per line
|
||||||
|
return '\n'.join(url_list)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Error getting URLs for colinkiri: {str(e)}")
|
||||||
|
|
||||||
|
@app.get("/urls/for-indexer", response_class=PlainTextResponse)
|
||||||
|
async def get_urls_for_indexer():
|
||||||
|
"""Get all URLs where indexer=false and mark them as processed"""
|
||||||
|
try:
|
||||||
|
conn = sqlite3.connect(DB_PATH)
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
# Get unprocessed URLs
|
||||||
|
cursor.execute("""
|
||||||
|
SELECT id, url
|
||||||
|
FROM pages
|
||||||
|
WHERE indexer = FALSE
|
||||||
|
ORDER BY created_at ASC
|
||||||
|
""")
|
||||||
|
|
||||||
|
page_ids = []
|
||||||
|
url_list = []
|
||||||
|
for row in cursor.fetchall():
|
||||||
|
page_ids.append(row[0])
|
||||||
|
url_list.append(row[1])
|
||||||
|
|
||||||
|
# Mark them as processed
|
||||||
|
if page_ids:
|
||||||
|
placeholders = ','.join(['?'] * len(page_ids))
|
||||||
|
cursor.execute(f"UPDATE pages SET indexer = TRUE WHERE id IN ({placeholders})", page_ids)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
# Return URLs as plain text, one per line
|
||||||
|
return '\n'.join(url_list)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Error getting URLs for indexer: {str(e)}")
|
||||||
|
|
||||||
|
@app.get("/search/linking-to")
|
||||||
|
async def search_pages_linking_to(target_url: str):
|
||||||
|
"""Find all pages that link to a specific URL"""
|
||||||
|
try:
|
||||||
|
conn = sqlite3.connect(DB_PATH)
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
cursor.execute("""
|
||||||
|
SELECT id, url, title, timestamp, linked_to
|
||||||
|
FROM pages
|
||||||
|
WHERE linked_to LIKE ?
|
||||||
|
ORDER BY created_at DESC
|
||||||
|
""", (f'%{target_url}%',))
|
||||||
|
|
||||||
|
pages = []
|
||||||
|
for row in cursor.fetchall():
|
||||||
|
linked_to = json.loads(row[4]) if row[4] else []
|
||||||
|
# Verify the exact URL is in the linked_to array
|
||||||
|
if target_url in linked_to:
|
||||||
|
pages.append({
|
||||||
|
"id": row[0],
|
||||||
|
"url": row[1],
|
||||||
|
"title": row[2],
|
||||||
|
"timestamp": row[3],
|
||||||
|
"linked_to": linked_to
|
||||||
|
})
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
return {
|
||||||
|
"target_url": target_url,
|
||||||
|
"pages": pages,
|
||||||
|
"count": len(pages)
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Error searching for pages linking to URL: {str(e)}")
|
||||||
|
|
||||||
|
@app.get("/stats")
|
||||||
|
async def get_stats():
|
||||||
|
"""Get overall statistics"""
|
||||||
|
try:
|
||||||
|
conn = sqlite3.connect(DB_PATH)
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
# Total pages captured
|
||||||
|
cursor.execute("SELECT COUNT(*) FROM pages")
|
||||||
|
total_pages = cursor.fetchone()[0]
|
||||||
|
|
||||||
|
# Total links captured
|
||||||
|
cursor.execute("SELECT COUNT(*) FROM links")
|
||||||
|
total_links = cursor.fetchone()[0]
|
||||||
|
|
||||||
|
# Total client links
|
||||||
|
cursor.execute("SELECT COUNT(*) FROM links WHERE is_client_link = 1")
|
||||||
|
client_links = cursor.fetchone()[0]
|
||||||
|
|
||||||
|
# Links by client
|
||||||
|
cursor.execute("""
|
||||||
|
SELECT client_name, COUNT(*) as link_count
|
||||||
|
FROM links
|
||||||
|
WHERE is_client_link = 1
|
||||||
|
GROUP BY client_name
|
||||||
|
ORDER BY link_count DESC
|
||||||
|
""")
|
||||||
|
client_stats = [{"client": row[0], "links": row[1]} for row in cursor.fetchall()]
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
return {
|
||||||
|
"total_pages": total_pages,
|
||||||
|
"total_links": total_links,
|
||||||
|
"client_links": client_links,
|
||||||
|
"other_links": total_links - client_links,
|
||||||
|
"client_breakdown": client_stats
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Error fetching stats: {str(e)}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import uvicorn
|
||||||
|
uvicorn.run(app, host="0.0.0.0", port=8000)
|
||||||
|
|
@ -0,0 +1,196 @@
|
||||||
|
# Link Tracker API
|
||||||
|
|
||||||
|
A simple web service that tracks links to your client websites and provides reports for SEO indexing services.
|
||||||
|
|
||||||
|
## What This Does
|
||||||
|
|
||||||
|
This tool helps you:
|
||||||
|
- Track which pages link to your client websites
|
||||||
|
- Generate lists of URLs to submit to indexing services like colinkiri
|
||||||
|
- Keep track of which URLs you've already submitted
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
### Step 1: Install Python
|
||||||
|
|
||||||
|
**On Mac:**
|
||||||
|
1. Open Terminal (press `Cmd + Space`, type "Terminal", press Enter)
|
||||||
|
2. Install Homebrew if you don't have it:
|
||||||
|
```bash
|
||||||
|
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
|
||||||
|
```
|
||||||
|
3. Install Python:
|
||||||
|
```bash
|
||||||
|
brew install python
|
||||||
|
```
|
||||||
|
|
||||||
|
**On Windows:**
|
||||||
|
1. Go to https://python.org/downloads/
|
||||||
|
2. Download the latest Python version
|
||||||
|
3. Run the installer
|
||||||
|
4. **Important:** Check "Add Python to PATH" during installation
|
||||||
|
5. Open Command Prompt (press `Win + R`, type "cmd", press Enter)
|
||||||
|
|
||||||
|
### Step 2: Download the Project
|
||||||
|
|
||||||
|
1. Open Terminal (Mac) or Command Prompt (Windows)
|
||||||
|
2. Navigate to where you want to install the project:
|
||||||
|
```bash
|
||||||
|
cd Desktop
|
||||||
|
```
|
||||||
|
3. Clone the project from GitHub:
|
||||||
|
```bash
|
||||||
|
git clone https://git.peninsulaindustries.com/bryanb/Link-Tracker-Server.git
|
||||||
|
cd link-tracker
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 3: Install Required Packages
|
||||||
|
|
||||||
|
**On Mac (Terminal):**
|
||||||
|
```bash
|
||||||
|
cd ~/Desktop/link-tracker
|
||||||
|
python3 -m venv venv
|
||||||
|
source venv/bin/activate
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
**On Windows (Command Prompt):**
|
||||||
|
```bash
|
||||||
|
cd %USERPROFILE%\Desktop\link-tracker
|
||||||
|
python -m venv venv
|
||||||
|
venv\Scripts\activate
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
## Running the Server
|
||||||
|
|
||||||
|
**On Mac:**
|
||||||
|
```bash
|
||||||
|
cd ~/Desktop/link-tracker
|
||||||
|
source venv/bin/activate
|
||||||
|
python main.py
|
||||||
|
```
|
||||||
|
|
||||||
|
**On Windows:**
|
||||||
|
```bash
|
||||||
|
cd %USERPROFILE%\Desktop\link-tracker
|
||||||
|
venv\Scripts\activate
|
||||||
|
python main.py
|
||||||
|
```
|
||||||
|
|
||||||
|
You should see something like:
|
||||||
|
```
|
||||||
|
INFO: Started server process [12345]
|
||||||
|
INFO: Waiting for application startup.
|
||||||
|
INFO: Application startup complete.
|
||||||
|
INFO: Uvicorn running on http://0.0.0.0:8000
|
||||||
|
```
|
||||||
|
|
||||||
|
**The server is now running!** Keep this window open while you use the system.
|
||||||
|
|
||||||
|
## Getting Your URL Reports
|
||||||
|
|
||||||
|
### For Colinkiri Indexing Service
|
||||||
|
|
||||||
|
Open your web browser and go to:
|
||||||
|
```
|
||||||
|
http://localhost:8000/urls/for-colinkiri
|
||||||
|
```
|
||||||
|
|
||||||
|
This will show you a list of URLs, one per line, that you can copy and paste into colinkiri. **Important:** After you visit this URL, those pages are marked as "submitted" so they won't appear in future reports.
|
||||||
|
|
||||||
|
### For Other Indexing Services
|
||||||
|
|
||||||
|
Open your web browser and go to:
|
||||||
|
```
|
||||||
|
http://localhost:8000/urls/for-indexer
|
||||||
|
```
|
||||||
|
|
||||||
|
Same as above, but for a different indexing service.
|
||||||
|
|
||||||
|
## Updating to the Latest Version
|
||||||
|
|
||||||
|
When updates are available, you can easily update the project:
|
||||||
|
|
||||||
|
1. Stop the server (press `Ctrl + C` in the terminal)
|
||||||
|
2. Update the code:
|
||||||
|
```bash
|
||||||
|
git pull origin main
|
||||||
|
```
|
||||||
|
3. Update any new dependencies:
|
||||||
|
```bash
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
4. Restart the server:
|
||||||
|
```bash
|
||||||
|
python main.py
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note:** Your data (link_tracker.db file) will not be affected by updates.
|
||||||
|
|
||||||
|
## Stopping the Server
|
||||||
|
|
||||||
|
To stop the server, go back to your Terminal/Command Prompt window and press `Ctrl + C`.
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
**"Command not found" errors:**
|
||||||
|
- Make sure Python is installed and added to your PATH
|
||||||
|
- Try using `python3` instead of `python` on Mac
|
||||||
|
- Try using `py` instead of `python` on Windows
|
||||||
|
|
||||||
|
**"Port already in use" error:**
|
||||||
|
- Another program is using port 8000
|
||||||
|
- Try changing the port in main.py (last line): `uvicorn.run(app, host="0.0.0.0", port=8001)`
|
||||||
|
- Then use `http://localhost:8001` instead of `http://localhost:8000`
|
||||||
|
|
||||||
|
**Can't access the URLs:**
|
||||||
|
- Make sure the server is running (you should see the "Uvicorn running" message)
|
||||||
|
- Check that you're using the correct URL: `http://localhost:8000`
|
||||||
|
- Try refreshing your browser
|
||||||
|
|
||||||
|
## Other Available Endpoints
|
||||||
|
|
||||||
|
These are for advanced users or developers:
|
||||||
|
|
||||||
|
### View All Captured Pages
|
||||||
|
```
|
||||||
|
http://localhost:8000/pages
|
||||||
|
```
|
||||||
|
Shows all pages that have been captured with link information.
|
||||||
|
|
||||||
|
### View Statistics
|
||||||
|
```
|
||||||
|
http://localhost:8000/stats
|
||||||
|
```
|
||||||
|
Shows overall statistics about captured pages and links.
|
||||||
|
|
||||||
|
### Search for Pages Linking to Specific URL
|
||||||
|
```
|
||||||
|
http://localhost:8000/search/linking-to?target_url=https://example.com
|
||||||
|
```
|
||||||
|
Find all pages that link to a specific URL.
|
||||||
|
|
||||||
|
### View Links for Specific Client
|
||||||
|
```
|
||||||
|
http://localhost:8000/clients/clientdomain.com/links
|
||||||
|
```
|
||||||
|
See all links pointing to a specific client domain.
|
||||||
|
|
||||||
|
### API Documentation
|
||||||
|
```
|
||||||
|
http://localhost:8000/docs
|
||||||
|
```
|
||||||
|
Interactive API documentation (for developers).
|
||||||
|
|
||||||
|
## Data Storage
|
||||||
|
|
||||||
|
The system automatically creates a file called `link_tracker.db` in the same folder as `main.py`. This file contains all your captured data. **Don't delete this file** unless you want to lose all your data.
|
||||||
|
|
||||||
|
## Support
|
||||||
|
|
||||||
|
If you run into issues:
|
||||||
|
1. Make sure Python is properly installed
|
||||||
|
2. Make sure you're in the correct folder when running commands
|
||||||
|
3. Check that the server is running before trying to access URLs
|
||||||
|
4. Try restarting the server if something seems stuck
|
||||||
|
|
@ -0,0 +1,3 @@
|
||||||
|
fastapi==0.104.1
|
||||||
|
uvicorn[standard]==0.24.0
|
||||||
|
pydantic==2.8.2
|
||||||
Loading…
Reference in New Issue