commit 3b99e345ed93d54879b8b52b422f95ebcf5dc49d
Author: Bryan Bigari <bbigari-mac@Bryans-MacBook-Air.local>
Date:   Fri Jun 13 11:14:42 2025 -0500

    local db works with limited reporting

diff --git a/link_tracker.db b/link_tracker.db
new file mode 100644
index 0000000..135c265
Binary files /dev/null and b/link_tracker.db differ
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..53d88d1
--- /dev/null
+++ b/main.py
@@ -0,0 +1,502 @@
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import PlainTextResponse
+from pydantic import BaseModel, HttpUrl
+from typing import List, Optional
+from datetime import datetime
+import sqlite3
+import json
+from pathlib import Path
+
+app = FastAPI(title="Link Tracker API", version="1.0.0")
+
+# Enable CORS for your Chrome extension
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # In production, restrict this to specific origins
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# Database setup
+DB_PATH = "link_tracker.db"
+
+def init_db():
+    """Initialize the database with required tables"""
+    conn = sqlite3.connect(DB_PATH)
+    cursor = conn.cursor()
+    
+    # Pages table - stores captured page information
+    cursor.execute("""
+        CREATE TABLE IF NOT EXISTS pages (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            url TEXT UNIQUE NOT NULL,
+            title TEXT NOT NULL,
+            keywords TEXT,  -- JSON array of keywords
+            timestamp DATETIME NOT NULL,
+            detected_clients TEXT,  -- JSON array of detected clients
+            total_links INTEGER NOT NULL,
+            linked_to TEXT,  -- JSON array of client URLs this page links to
+            colinkiri BOOLEAN DEFAULT FALSE,
+            indexer BOOLEAN DEFAULT FALSE,
+            t2 BOOLEAN DEFAULT FALSE,
+            created_at DATETIME DEFAULT CURRENT_TIMESTAMP
+        )
+    """)
+    
+    # For existing databases, add the new columns if they don't exist
+    try:
+        cursor.execute("ALTER TABLE pages ADD COLUMN colinkiri BOOLEAN DEFAULT FALSE")
+    except sqlite3.OperationalError:
+        pass  # Column already exists
+    
+    try:
+        cursor.execute("ALTER TABLE pages ADD COLUMN indexer BOOLEAN DEFAULT FALSE")
+    except sqlite3.OperationalError:
+        pass  # Column already exists
+        
+    try:
+        cursor.execute("ALTER TABLE pages ADD COLUMN t2 BOOLEAN DEFAULT FALSE")
+    except sqlite3.OperationalError:
+        pass  # Column already exists
+        
+    try:
+        cursor.execute("ALTER TABLE pages ADD COLUMN linked_to TEXT")
+    except sqlite3.OperationalError:
+        pass  # Column already exists
+    
+    # Links table - stores all external links found on pages
+    cursor.execute("""
+        CREATE TABLE IF NOT EXISTS links (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            page_id INTEGER,
+            href TEXT NOT NULL,
+            anchor_text TEXT,
+            title_attr TEXT,
+            domain TEXT NOT NULL,
+            is_client_link BOOLEAN DEFAULT FALSE,
+            client_domain TEXT,
+            client_name TEXT,
+            created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
+            FOREIGN KEY (page_id) REFERENCES pages (id)
+        )
+    """)
+    
+    # Create indexes for better query performance
+    cursor.execute("CREATE INDEX IF NOT EXISTS idx_pages_url ON pages (url)")
+    cursor.execute("CREATE INDEX IF NOT EXISTS idx_links_domain ON links (domain)")
+    cursor.execute("CREATE INDEX IF NOT EXISTS idx_links_client_domain ON links (client_domain)")
+    cursor.execute("CREATE INDEX IF NOT EXISTS idx_links_page_id ON links (page_id)")
+    
+    conn.commit()
+    conn.close()
+
+# Pydantic models for API requests
+class LinkData(BaseModel):
+    href: str
+    text: Optional[str] = ""
+    title: Optional[str] = ""
+
+class DetectedClient(BaseModel):
+    domain: str
+    name: str
+
+class PageCaptureRequest(BaseModel):
+    url: str
+    title: str
+    timestamp: str
+    keywords: List[str]
+    detectedClients: List[DetectedClient]
+    totalLinks: int
+    links: List[LinkData]
+
+# API Response models
+class PageSummary(BaseModel):
+    id: int
+    url: str
+    title: str
+    timestamp: str
+    detected_clients: List[str]
+    total_links: int
+    client_links_count: int
+
+class LinkSummary(BaseModel):
+    href: str
+    anchor_text: str
+    domain: str
+    is_client_link: bool
+    client_name: Optional[str] = None
+
+@app.on_event("startup")
+async def startup_event():
+    """Initialize database on startup"""
+    init_db()
+
+@app.get("/")
+async def root():
+    """Health check endpoint"""
+    return {"message": "Link Tracker API is running"}
+
+@app.post("/capture-page")
+async def capture_page(data: PageCaptureRequest):
+    """Capture page data and links from Chrome extension"""
+    try:
+        print(f"Received data: {data}")  # Debug logging
+        
+        conn = sqlite3.connect(DB_PATH)
+        cursor = conn.cursor()
+        
+        # Check if page already exists
+        cursor.execute("SELECT id FROM pages WHERE url = ?", (data.url,))
+        existing_page = cursor.fetchone()
+        
+        # Get client domains for faster lookup
+        client_domains = {c.domain: c.name for c in data.detectedClients}
+        
+        # Collect client URLs for the linked_to field
+        client_urls = []
+        
+        if existing_page:
+            # Update existing page
+            page_id = existing_page[0]
+            cursor.execute("""
+                UPDATE pages 
+                SET title = ?, keywords = ?, timestamp = ?, 
+                    detected_clients = ?, total_links = ?, linked_to = ?
+                WHERE id = ?
+            """, (
+                data.title,
+                json.dumps(data.keywords),
+                data.timestamp,
+                json.dumps([{"domain": c.domain, "name": c.name} for c in data.detectedClients]),
+                data.totalLinks,
+                json.dumps([]),  # Will be populated below
+                page_id
+            ))
+            
+            # Delete existing links for this page
+            cursor.execute("DELETE FROM links WHERE page_id = ?", (page_id,))
+        else:
+            # Insert new page
+            cursor.execute("""
+                INSERT INTO pages (url, title, keywords, timestamp, detected_clients, total_links, linked_to)
+                VALUES (?, ?, ?, ?, ?, ?, ?)
+            """, (
+                data.url,
+                data.title,
+                json.dumps(data.keywords),
+                data.timestamp,
+                json.dumps([{"domain": c.domain, "name": c.name} for c in data.detectedClients]),
+                data.totalLinks,
+                json.dumps([])  # Will be populated below
+            ))
+            page_id = cursor.lastrowid
+        
+        # Insert links
+        for link in data.links:
+            try:
+                from urllib.parse import urlparse
+                parsed_url = urlparse(link.href)
+                domain = parsed_url.netloc.replace('www.', '')
+                
+                # Check if this is a client link
+                is_client_link = domain in client_domains
+                client_name = client_domains.get(domain) if is_client_link else None
+                
+                # If it's a client link, add to linked_to array
+                if is_client_link:
+                    client_urls.append(link.href)
+                
+                cursor.execute("""
+                    INSERT INTO links (page_id, href, anchor_text, title_attr, domain, 
+                                     is_client_link, client_domain, client_name)
+                    VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+                """, (
+                    page_id,
+                    link.href,
+                    link.text or "",
+                    link.title or "",
+                    domain,
+                    is_client_link,
+                    domain if is_client_link else None,
+                    client_name
+                ))
+            except Exception as e:
+                print(f"Error processing link {link.href}: {e}")
+                continue
+        
+        # Update the linked_to field with collected client URLs
+        cursor.execute("UPDATE pages SET linked_to = ? WHERE id = ?", (json.dumps(client_urls), page_id))
+        
+        conn.commit()
+        conn.close()
+        
+        return {
+            "success": True,
+            "message": f"Captured {data.totalLinks} links from {data.url}",
+            "page_id": page_id,
+            "detected_clients": len(data.detectedClients)
+        }
+        
+    except Exception as e:
+        print(f"Error details: {e}")  # Debug logging
+        import traceback
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=f"Error capturing page data: {str(e)}")
+
+@app.get("/pages", response_model=List[PageSummary])
+async def get_pages(limit: int = 50, offset: int = 0):
+    """Get list of captured pages"""
+    try:
+        conn = sqlite3.connect(DB_PATH)
+        cursor = conn.cursor()
+        
+        cursor.execute("""
+            SELECT p.id, p.url, p.title, p.timestamp, p.detected_clients, p.total_links,
+                   COUNT(l.id) as client_links_count
+            FROM pages p
+            LEFT JOIN links l ON p.id = l.page_id AND l.is_client_link = 1
+            GROUP BY p.id
+            ORDER BY p.created_at DESC
+            LIMIT ? OFFSET ?
+        """, (limit, offset))
+        
+        pages = []
+        for row in cursor.fetchall():
+            detected_clients_data = json.loads(row[4]) if row[4] else []
+            client_names = [c["name"] for c in detected_clients_data]
+            
+            pages.append(PageSummary(
+                id=row[0],
+                url=row[1],
+                title=row[2],
+                timestamp=row[3],
+                detected_clients=client_names,
+                total_links=row[5],
+                client_links_count=row[6]
+            ))
+        
+        conn.close()
+        return pages
+        
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error fetching pages: {str(e)}")
+
+@app.get("/pages/{page_id}/links", response_model=List[LinkSummary])
+async def get_page_links(page_id: int):
+    """Get all links for a specific page"""
+    try:
+        conn = sqlite3.connect(DB_PATH)
+        cursor = conn.cursor()
+        
+        cursor.execute("""
+            SELECT href, anchor_text, domain, is_client_link, client_name
+            FROM links
+            WHERE page_id = ?
+            ORDER BY is_client_link DESC, domain ASC
+        """, (page_id,))
+        
+        links = []
+        for row in cursor.fetchall():
+            links.append(LinkSummary(
+                href=row[0],
+                anchor_text=row[1],
+                domain=row[2],
+                is_client_link=bool(row[3]),
+                client_name=row[4]
+            ))
+        
+        conn.close()
+        return links
+        
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error fetching links: {str(e)}")
+
+@app.get("/clients/{client_domain}/links")
+async def get_client_links(client_domain: str, limit: int = 100):
+    """Get all links pointing to a specific client domain"""
+    try:
+        conn = sqlite3.connect(DB_PATH)
+        cursor = conn.cursor()
+        
+        cursor.execute("""
+            SELECT l.href, l.anchor_text, p.url as source_page, p.title as source_title, 
+                   l.client_name, p.timestamp
+            FROM links l
+            JOIN pages p ON l.page_id = p.id
+            WHERE l.client_domain = ?
+            ORDER BY p.timestamp DESC
+            LIMIT ?
+        """, (client_domain, limit))
+        
+        links = []
+        for row in cursor.fetchall():
+            links.append({
+                "target_url": row[0],
+                "anchor_text": row[1],
+                "source_page": row[2],
+                "source_title": row[3],
+                "client_name": row[4],
+                "timestamp": row[5]
+            })
+        
+        conn.close()
+        return {"client_domain": client_domain, "links": links}
+        
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error fetching client links: {str(e)}")
+
+@app.get("/urls/for-colinkiri", response_class=PlainTextResponse)
+async def get_urls_for_colinkiri():
+    """Get all URLs where colinkiri=false and mark them as processed"""
+    try:
+        conn = sqlite3.connect(DB_PATH)
+        cursor = conn.cursor()
+        
+        # Get unprocessed URLs
+        cursor.execute("""
+            SELECT id, url
+            FROM pages 
+            WHERE colinkiri = FALSE
+            ORDER BY created_at ASC
+        """)
+        
+        page_ids = []
+        url_list = []
+        for row in cursor.fetchall():
+            page_ids.append(row[0])
+            url_list.append(row[1])
+        
+        # Mark them as processed
+        if page_ids:
+            placeholders = ','.join(['?'] * len(page_ids))
+            cursor.execute(f"UPDATE pages SET colinkiri = TRUE WHERE id IN ({placeholders})", page_ids)
+            conn.commit()
+        
+        conn.close()
+        
+        # Return URLs as plain text, one per line
+        return '\n'.join(url_list)
+        
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error getting URLs for colinkiri: {str(e)}")
+
+@app.get("/urls/for-indexer", response_class=PlainTextResponse)
+async def get_urls_for_indexer():
+    """Get all URLs where indexer=false and mark them as processed"""
+    try:
+        conn = sqlite3.connect(DB_PATH)
+        cursor = conn.cursor()
+        
+        # Get unprocessed URLs
+        cursor.execute("""
+            SELECT id, url
+            FROM pages 
+            WHERE indexer = FALSE
+            ORDER BY created_at ASC
+        """)
+        
+        page_ids = []
+        url_list = []
+        for row in cursor.fetchall():
+            page_ids.append(row[0])
+            url_list.append(row[1])
+        
+        # Mark them as processed
+        if page_ids:
+            placeholders = ','.join(['?'] * len(page_ids))
+            cursor.execute(f"UPDATE pages SET indexer = TRUE WHERE id IN ({placeholders})", page_ids)
+            conn.commit()
+        
+        conn.close()
+        
+        # Return URLs as plain text, one per line
+        return '\n'.join(url_list)
+        
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error getting URLs for indexer: {str(e)}")
+
+@app.get("/search/linking-to")
+async def search_pages_linking_to(target_url: str):
+    """Find all pages that link to a specific URL"""
+    try:
+        conn = sqlite3.connect(DB_PATH)
+        cursor = conn.cursor()
+        
+        cursor.execute("""
+            SELECT id, url, title, timestamp, linked_to
+            FROM pages 
+            WHERE linked_to LIKE ?
+            ORDER BY created_at DESC
+        """, (f'%{target_url}%',))
+        
+        pages = []
+        for row in cursor.fetchall():
+            linked_to = json.loads(row[4]) if row[4] else []
+            # Verify the exact URL is in the linked_to array
+            if target_url in linked_to:
+                pages.append({
+                    "id": row[0],
+                    "url": row[1],
+                    "title": row[2],
+                    "timestamp": row[3],
+                    "linked_to": linked_to
+                })
+        
+        conn.close()
+        
+        return {
+            "target_url": target_url,
+            "pages": pages,
+            "count": len(pages)
+        }
+        
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error searching for pages linking to URL: {str(e)}")
+
+@app.get("/stats")
+async def get_stats():
+    """Get overall statistics"""
+    try:
+        conn = sqlite3.connect(DB_PATH)
+        cursor = conn.cursor()
+        
+        # Total pages captured
+        cursor.execute("SELECT COUNT(*) FROM pages")
+        total_pages = cursor.fetchone()[0]
+        
+        # Total links captured
+        cursor.execute("SELECT COUNT(*) FROM links")
+        total_links = cursor.fetchone()[0]
+        
+        # Total client links
+        cursor.execute("SELECT COUNT(*) FROM links WHERE is_client_link = 1")
+        client_links = cursor.fetchone()[0]
+        
+        # Links by client
+        cursor.execute("""
+            SELECT client_name, COUNT(*) as link_count
+            FROM links
+            WHERE is_client_link = 1
+            GROUP BY client_name
+            ORDER BY link_count DESC
+        """)
+        client_stats = [{"client": row[0], "links": row[1]} for row in cursor.fetchall()]
+        
+        conn.close()
+        
+        return {
+            "total_pages": total_pages,
+            "total_links": total_links,
+            "client_links": client_links,
+            "other_links": total_links - client_links,
+            "client_breakdown": client_stats
+        }
+        
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error fetching stats: {str(e)}")
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)
\ No newline at end of file
diff --git a/readme_md.md b/readme_md.md
new file mode 100644
index 0000000..720d60b
--- /dev/null
+++ b/readme_md.md
@@ -0,0 +1,196 @@
+# Link Tracker API
+
+A simple web service that tracks links to your client websites and provides reports for SEO indexing services.
+
+## What This Does
+
+This tool helps you:
+- Track which pages link to your client websites
+- Generate lists of URLs to submit to indexing services like colinkiri
+- Keep track of which URLs you've already submitted
+
+## Installation
+
+### Step 1: Install Python
+
+**On Mac:**
+1. Open Terminal (press `Cmd + Space`, type "Terminal", press Enter)
+2. Install Homebrew if you don't have it:
+   ```bash
+   /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
+   ```
+3. Install Python:
+   ```bash
+   brew install python
+   ```
+
+**On Windows:**
+1. Go to https://python.org/downloads/
+2. Download the latest Python version
+3. Run the installer
+4. **Important:** Check "Add Python to PATH" during installation
+5. Open Command Prompt (press `Win + R`, type "cmd", press Enter)
+
+### Step 2: Download the Project
+
+1. Open Terminal (Mac) or Command Prompt (Windows)
+2. Navigate to where you want to install the project:
+   ```bash
+   cd Desktop
+   ```
+3. Clone the project from GitHub:
+   ```bash
+   git clone https://git.peninsulaindustries.com/bryanb/Link-Tracker-Server.git
+   cd link-tracker
+   ```
+
+### Step 3: Install Required Packages
+
+**On Mac (Terminal):**
+```bash
+cd ~/Desktop/link-tracker
+python3 -m venv venv
+source venv/bin/activate
+pip install -r requirements.txt
+```
+
+**On Windows (Command Prompt):**
+```bash
+cd %USERPROFILE%\Desktop\link-tracker
+python -m venv venv
+venv\Scripts\activate
+pip install -r requirements.txt
+```
+
+## Running the Server
+
+**On Mac:**
+```bash
+cd ~/Desktop/link-tracker
+source venv/bin/activate
+python main.py
+```
+
+**On Windows:**
+```bash
+cd %USERPROFILE%\Desktop\link-tracker
+venv\Scripts\activate
+python main.py
+```
+
+You should see something like:
+```
+INFO:     Started server process [12345]
+INFO:     Waiting for application startup.
+INFO:     Application startup complete.
+INFO:     Uvicorn running on http://0.0.0.0:8000
+```
+
+**The server is now running!** Keep this window open while you use the system.
+
+## Getting Your URL Reports
+
+### For Colinkiri Indexing Service
+
+Open your web browser and go to:
+```
+http://localhost:8000/urls/for-colinkiri
+```
+
+This will show you a list of URLs, one per line, that you can copy and paste into colinkiri. **Important:** After you visit this URL, those pages are marked as "submitted" so they won't appear in future reports.
+
+### For Other Indexing Services
+
+Open your web browser and go to:
+```
+http://localhost:8000/urls/for-indexer
+```
+
+Same as above, but for a different indexing service.
+
+## Updating to the Latest Version
+
+When updates are available, you can easily update the project:
+
+1. Stop the server (press `Ctrl + C` in the terminal)
+2. Update the code:
+   ```bash
+   git pull origin main
+   ```
+3. Update any new dependencies:
+   ```bash
+   pip install -r requirements.txt
+   ```
+4. Restart the server:
+   ```bash
+   python main.py
+   ```
+
+**Note:** Your data (link_tracker.db file) will not be affected by updates.
+
+## Stopping the Server
+
+To stop the server, go back to your Terminal/Command Prompt window and press `Ctrl + C`.
+
+## Troubleshooting
+
+**"Command not found" errors:**
+- Make sure Python is installed and added to your PATH
+- Try using `python3` instead of `python` on Mac
+- Try using `py` instead of `python` on Windows
+
+**"Port already in use" error:**
+- Another program is using port 8000
+- Try changing the port in main.py (last line): `uvicorn.run(app, host="0.0.0.0", port=8001)`
+- Then use `http://localhost:8001` instead of `http://localhost:8000`
+
+**Can't access the URLs:**
+- Make sure the server is running (you should see the "Uvicorn running" message)
+- Check that you're using the correct URL: `http://localhost:8000`
+- Try refreshing your browser
+
+## Other Available Endpoints
+
+These are for advanced users or developers:
+
+### View All Captured Pages
+```
+http://localhost:8000/pages
+```
+Shows all pages that have been captured with link information.
+
+### View Statistics
+```
+http://localhost:8000/stats
+```
+Shows overall statistics about captured pages and links.
+
+### Search for Pages Linking to Specific URL
+```
+http://localhost:8000/search/linking-to?target_url=https://example.com
+```
+Find all pages that link to a specific URL.
+
+### View Links for Specific Client
+```
+http://localhost:8000/clients/clientdomain.com/links
+```
+See all links pointing to a specific client domain.
+
+### API Documentation
+```
+http://localhost:8000/docs
+```
+Interactive API documentation (for developers).
+
+## Data Storage
+
+The system automatically creates a file called `link_tracker.db` in the same folder as `main.py`. This file contains all your captured data. **Don't delete this file** unless you want to lose all your data.
+
+## Support
+
+If you run into issues:
+1. Make sure Python is properly installed
+2. Make sure you're in the correct folder when running commands
+3. Check that the server is running before trying to access URLs
+4. Try restarting the server if something seems stuck
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..1d4616d
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+fastapi==0.104.1
+uvicorn[standard]==0.24.0
+pydantic==2.8.2