Preface
In the era of digital marketing, Search Engine Optimization (SEO) has become a crucial method for websites to acquire traffic. However, traditional SEO tasks often involve a significant amount of repetitive manual work, leading to inefficiency and a high potential for errors. This article will guide you through using Python to develop a comprehensive SEO automation toolset, helping you enhance SEO work efficiency and implement data-driven optimization strategies.
Project Overview
Core Functional Modules
Our SEO automation tool will include the following core functions:
- Keyword Research & Analysis
- Keyword Discovery
- Competition Analysis
- Search Volume Statistics
- Website Technical SEO Audit
- Page Load Speed Analysis
- Meta Tag Inspection
- Internal Link Structure Analysis
- Mobile-Friendliness Detection
- Content Optimization Suggestions
- Keyword Density Analysis
- Content Quality Assessment
- Title Optimization Suggestions
- Competitor Analysis
- Rank Tracking
- Backlink Analysis
- Content Strategy Research
- Automated Link Building
- Link Opportunity Discovery
- Automated Outreach
- Link Quality Assessment
- Link Monitoring & Management
- Automated Report Generation
- Data Visualization
- Scheduled Report Delivery
- Trend Analysis
Technology Stack Selection
Core Dependencies
python
# Web Requests & Data Scraping import requests from bs4 import BeautifulSoup import selenium from selenium import webdriver # Data Processing & Analysis import pandas as pd import numpy as np from textstat import flesch_reading_ease # SEO Specific Libraries import advertools as adv from googlesearch import search # Data Visualization import matplotlib.pyplot as plt import seaborn as sns import plotly.express as px # Automation & Scheduling import schedule import time from datetime import datetime # Link Building Related import smtplib from email.mime.text import MIMEText from email.mime.multipart import MIMEMultipart import json import random # Configuration Management import configparser import os from dotenv import load_dotenv
Core Module Implementation
1. Keyword Research Module
python
class KeywordResearcher:
def __init__(self, api_key=None):
self.api_key = api_key
def extract_keywords_from_content(self, content, language='zh'):
"""Extract keywords from content"""
# Use jieba for Chinese word segmentation
import jieba
import jieba.analyse
keywords = jieba.analyse.extract_tags(
content,
topK=20,
withWeight=True
)
return keywords
def get_search_suggestions(self, seed_keyword):
"""Get search suggestions"""
suggestions = adv.serp_goog(
q=seed_keyword,
cx=self.api_key,
num=10
)
return suggestions
def analyze_keyword_difficulty(self, keyword):
"""Analyze keyword competition difficulty"""
# Simulate competition analysis logic
search_results = list(search(keyword, num=10, stop=10))
difficulty_score = {
'keyword': keyword,
'competition_level': len(search_results),
'estimated_difficulty': 'Medium' # Can be based on more complex algorithms
}
return difficulty_score
2. Technical SEO Audit Module
python
class TechnicalSEOAnalyzer:
def __init__(self):
self.session = requests.Session()
def check_page_speed(self, url):
"""Check page load speed"""
start_time = time.time()
try:
response = self.session.get(url, timeout=10)
load_time = time.time() - start_time
return {
'url': url,
'load_time': round(load_time, 2),
'status_code': response.status_code,
'content_size': len(response.content)
}
except Exception as e:
return {'url': url, 'error': str(e)}
def analyze_meta_tags(self, url):
"""Analyze Meta Tags"""
try:
response = self.session.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
meta_analysis = {
'title': soup.find('title').text if soup.find('title') else None,
'meta_description': None,
'meta_keywords': None,
'h1_tags': [h1.text for h1 in soup.find_all('h1')],
'h2_tags': [h2.text for h2 in soup.find_all('h2')],
'image_alt_missing': len([img for img in soup.find_all('img') if not img.get('alt')])
}
# Get meta description
meta_desc = soup.find('meta', attrs={'name': 'description'})
if meta_desc:
meta_analysis['meta_description'] = meta_desc.get('content')
return meta_analysis
except Exception as e:
return {'url': url, 'error': str(e)}
def check_internal_links(self, url, domain):
"""Check internal link structure"""
try:
response = self.session.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
all_links = soup.find_all('a', href=True)
internal_links = [
link['href'] for link in all_links
if domain in link['href'] or link['href'].startswith('/')
]
return {
'total_links': len(all_links),
'internal_links': len(internal_links),
'external_links': len(all_links) - len(internal_links),
'internal_link_ratio': len(internal_links) / len(all_links) if all_links else 0
}
except Exception as e:
return {'url': url, 'error': str(e)}
3. Content Optimization Analysis Module
python
class ContentOptimizer:
def __init__(self):
pass
def analyze_keyword_density(self, content, target_keywords):
"""Analyze keyword density"""
import re
# Clean text
clean_content = re.sub(r'<[^>]+>', '', content.lower())
word_count = len(clean_content.split())
keyword_analysis = {}
for keyword in target_keywords:
keyword_count = clean_content.count(keyword.lower())
density = (keyword_count / word_count) * 100 if word_count > 0 else 0
keyword_analysis[keyword] = {
'count': keyword_count,
'density': round(density, 2),
'recommendation': self._get_density_recommendation(density)
}
return keyword_analysis
def _get_density_recommendation(self, density):
"""Get keyword density recommendation"""
if density < 1:
return "Density too low, suggest increasing keyword usage"
elif density > 3:
return "Density too high, might be considered keyword stuffing"
else:
return "Density is appropriate"
def analyze_content_quality(self, content):
"""Analyze content quality"""
word_count = len(content.split())
# Use textstat library to analyze readability
readability_score = flesch_reading_ease(content)
quality_metrics = {
'word_count': word_count,
'readability_score': readability_score,
'readability_level': self._get_readability_level(readability_score),
'recommendations': self._get_content_recommendations(word_count, readability_score)
}
return quality_metrics
def _get_readability_level(self, score):
"""Get readability level"""
if score >= 90:
return "Very Easy to read"
elif score >= 80:
return "Easy to read"
elif score >= 70:
return "Fairly Easy to read"
elif score >= 60:
return "Standard reading difficulty"
else:
return "Fairly Difficult to read"
def _get_content_recommendations(self, word_count, readability_score):
"""Get content optimization recommendations"""
recommendations = []
if word_count < 300:
recommendations.append("Content length is short, suggest increasing to at least 300 words")
elif word_count > 2000:
recommendations.append("Content is long, consider splitting into sections or pages")
if readability_score < 60:
recommendations.append("Content readability is low, suggest using simpler sentence structures")
return recommendations
4. Automated Link Building Module
python
class BacklinkBuilder:
def __init__(self, email_config=None):
self.email_config = email_config or {}
self.prospects_db = []
def find_link_opportunities(self, target_keywords, competitor_urls=None):
"""Discover link opportunities"""
opportunities = []
# 1. Search for relevant websites based on keywords
for keyword in target_keywords:
search_queries = [
f"{keyword} resource page",
f"{keyword} links",
f"{keyword} directory",
f"best {keyword} websites",
f"{keyword} tool recommendations"
]
for query in search_queries:
try:
search_results = list(search(query, num=10, stop=10))
for url in search_results:
opportunity = self._analyze_link_opportunity(url, keyword)
if opportunity['score'] > 50: # Only keep high-quality opportunities
opportunities.append(opportunity)
except Exception as e:
print(f"Search error: {e}")
# 2. Analyze competitor backlinks
if competitor_urls:
for competitor_url in competitor_urls:
competitor_backlinks = self._get_competitor_backlinks(competitor_url)
opportunities.extend(competitor_backlinks)
return self._deduplicate_opportunities(opportunities)
def _analyze_link_opportunity(self, url, keyword):
"""Analyze a single link opportunity"""
try:
response = requests.get(url, timeout=10)
soup = BeautifulSoup(response.content, 'html.parser')
# Basic information extraction
title = soup.find('title').text if soup.find('title') else ""
meta_desc = soup.find('meta', attrs={'name': 'description'})
meta_desc = meta_desc.get('content') if meta_desc else ""
# Calculate relevance score
relevance_score = self._calculate_relevance_score(
title + " " + meta_desc, keyword
)
# Check for contact information
contact_info = self._extract_contact_info(soup)
# Check page authority metrics
authority_score = self._estimate_authority(soup, url)
opportunity = {
'url': url,
'title': title,
'keyword': keyword,
'relevance_score': relevance_score,
'authority_score': authority_score,
'contact_info': contact_info,
'score': (relevance_score + authority_score) / 2,
'status': 'discovered',
'discovered_date': datetime.now().isoformat()
}
return opportunity
except Exception as e:
return {
'url': url,
'keyword': keyword,
'error': str(e),
'score': 0,
'status': 'error'
}
def _calculate_relevance_score(self, content, keyword):
"""Calculate content relevance score"""
content_lower = content.lower()
keyword_lower = keyword.lower()
# Simple relevance calculation
keyword_count = content_lower.count(keyword_lower)
content_length = len(content.split())
if content_length == 0:
return 0
# Calculate score based on keyword density and frequency
density = (keyword_count / content_length) * 100
base_score = min(keyword_count * 10, 50) # Max 50 points
density_bonus = min(density * 5, 30) # Max 30 points
return min(base_score + density_bonus, 100)
def _extract_contact_info(self, soup):
"""Extract contact information"""
contact_info = {
'email': None,
'contact_page': None,
'social_media': []
}
# Find email
import re
email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
page_text = soup.get_text()
emails = re.findall(email_pattern, page_text)
if emails:
contact_info['email'] = emails[0]
# Find contact page link
contact_links = soup.find_all('a', href=True)
for link in contact_links:
href = link['href'].lower()
text = link.get_text().lower()
if any(word in href or word in text for word in ['contact', '联系', 'about', '关于']):
contact_info['contact_page'] = link['href']
break
# Find social media links
social_patterns = {
'twitter': r'twitter\.com',
'facebook': r'facebook\.com',
'linkedin': r'linkedin\.com',
'weibo': r'weibo\.com'
}
for link in contact_links:
href = link.get('href', '')
for platform, pattern in social_patterns.items():
if re.search(pattern, href):
contact_info['social_media'].append({
'platform': platform,
'url': href
})
return contact_info
def _estimate_authority(self, soup, url):
"""Estimate website authority"""
authority_score = 0
# Based on domain age (simplified)
domain = url.split('/')[2]
if len(domain.split('.')) >= 2:
authority_score += 20
# Based on content quality metrics
text_content = soup.get_text()
word_count = len(text_content.split())
if word_count > 500:
authority_score += 20
if word_count > 1000:
authority_score += 10
# Based on page structure
if soup.find_all('h1'):
authority_score += 10
if soup.find_all('h2'):
authority_score += 10
if soup.find_all('img'):
authority_score += 10
# Based on number of outbound links (links on the page)
external_links = len([
link for link in soup.find_all('a', href=True)
if 'http' in link['href'] and domain not in link['href']
])
if external_links > 5:
authority_score += 10
if external_links > 20:
authority_score += 10
return min(authority_score, 100)
def _get_competitor_backlinks(self, competitor_url):
"""Get competitor backlinks (simplified version)"""
# Should integrate professional backlink analysis APIs here
# e.g., Ahrefs, SEMrush, etc. Providing a mock implementation.
mock_backlinks = [
{
'url': 'https://example-blog.com',
'title': 'Relevant Industry Blog',
'authority_score': 75,
'relevance_score': 80,
'score': 77.5,
'source': f'competitor_analysis_{competitor_url}',
'status': 'discovered',
'discovered_date': datetime.now().isoformat()
}
]
return mock_backlinks
def _deduplicate_opportunities(self, opportunities):
"""Deduplicate link opportunities"""
seen_urls = set()
unique_opportunities = []
for opp in opportunities:
if opp.get('url') not in seen_urls:
seen_urls.add(opp.get('url'))
unique_opportunities.append(opp)
# Sort by score
return sorted(unique_opportunities, key=lambda x: x.get('score', 0), reverse=True)
def generate_outreach_email(self, opportunity, your_website, your_content_url):
"""Generate outreach email"""
templates = [
{
'subject': f"Resource Recommendation for {opportunity['title']}",
'body': f"""
Hello,
I am a content editor at {your_website}. I just read your article "{opportunity['title']}" and found it very valuable.
We recently published an in-depth article about {opportunity['keyword']}: {your_content_url}
This article provides unique insights and practical advice, and I believe it would bring additional value to your readers. If you find it suitable, would you consider adding this link to your article?
Thank you for your time and consideration.
Best regards,
[Your Name]
"""
},
{
'subject': f"Quality Content Recommendation for Your {opportunity['keyword']} Resource Page",
'body': f"""
Hello,
I discovered your website {opportunity['url']} while searching for {opportunity['keyword']} related resources. The resource list you've compiled is very comprehensive!
I'd like to recommend an article we recently published: {your_content_url}
This article delves deeply into the latest trends and best practices for {opportunity['keyword']}, including original research and case studies. I believe it would be a valuable addition to your resource list.
If you have any questions or need more information, please feel free to contact me.
Thank you!
[Your Name]
"""
}
]
template = random.choice(templates)
return {
'to_email': opportunity['contact_info'].get('email'),
'subject': template['subject'],
'body': template['body'],
'opportunity_id': opportunity.get('url'),
'created_date': datetime.now().isoformat()
}
def send_outreach_email(self, email_data):
"""Send outreach email"""
if not self.email_config or not email_data.get('to_email'):
return {'status': 'error', 'message': 'Missing email configuration or recipient email'}
try:
msg = MIMEMultipart()
msg['From'] = self.email_config['from_email']
msg['To'] = email_data['to_email']
msg['Subject'] = email_data['subject']
msg.attach(MIMEText(email_data['body'], 'plain', 'utf-8'))
server = smtplib.SMTP(self.email_config['smtp_server'], self.email_config['smtp_port'])
server.starttls()
server.login(self.email_config['username'], self.email_config['password'])
text = msg.as_string()
server.sendmail(self.email_config['from_email'], email_data['to_email'], text)
server.quit()
return {
'status': 'sent',
'message': 'Email sent successfully',
'sent_date': datetime.now().isoformat()
}
except Exception as e:
return {
'status': 'error',
'message': f'Failed to send email: {str(e)}'
}
def track_backlink_status(self, target_url, backlink_urls):
"""Monitor backlink status"""
backlink_status = []
for backlink_url in backlink_urls:
try:
response = requests.get(backlink_url, timeout=10)
soup = BeautifulSoup(response.content, 'html.parser')
# Check if it contains the target link
links = soup.find_all('a', href=True)
has_backlink = any(target_url in link['href'] for link in links)
status = {
'backlink_url': backlink_url,
'target_url': target_url,
'has_backlink': has_backlink,
'checked_date': datetime.now().isoformat(),
'status_code': response.status_code
}
backlink_status.append(status)
except Exception as e:
backlink_status.append({
'backlink_url': backlink_url,
'target_url': target_url,
'error': str(e),
'checked_date': datetime.now().isoformat()
})
return backlink_status
def save_prospects_to_file(self, opportunities, filename='backlink_prospects.json'):
"""Save link opportunities to file"""
with open(filename, 'w', encoding='utf-8') as f:
json.dump(opportunities, f, ensure_ascii=False, indent=2)
return filename
def load_prospects_from_file(self, filename='backlink_prospects.json'):
"""Load link opportunities from file"""
try:
with open(filename, 'r', encoding='utf-8') as f:
return json.load(f)
except FileNotFoundError:
return []
5. Automated Report Generation Module
python
class SEOReportGenerator:
def __init__(self, output_dir='reports'):
self.output_dir = output_dir
os.makedirs(output_dir, exist_ok=True)
def generate_comprehensive_report(self, analysis_data):
"""Generate comprehensive SEO report"""
report_date = datetime.now().strftime('%Y-%m-%d')
# Create HTML report
html_content = self._create_html_report(analysis_data, report_date)
# Save report
report_path = os.path.join(self.output_dir, f'seo_report_{report_date}.html')
with open(report_path, 'w', encoding='utf-8') as f:
f.write(html_content)
return report_path
def _create_html_report(self, data, date):
"""Create HTML format report"""
html_template = f"""
<!DOCTYPE html>
<html>
<head>
<title>SEO Analysis Report - {date}</title>
<meta charset="utf-8">
<style>
body {{ font-family: Arial, sans-serif; margin: 40px; }}
.header {{ background-color: #f4f4f4; padding: 20px; }}
.section {{ margin: 20px 0; }}
.metric {{ background-color: #e9e9e9; padding: 10px; margin: 5px 0; }}
.recommendation {{ background-color: #fff3cd; padding: 10px; margin: 5px 0; }}
</style>
</head>
<body>
<div class="header">
<h1>SEO Automated Analysis Report</h1>
<p>Generated Date: {date}</p>
</div>
<div class="section">
<h2>Technical SEO Audit Results</h2>
{self._format_technical_seo_data(data.get('technical_seo', {}))}
</div>
<div class="section">
<h2>Content Optimization Suggestions</h2>
{self._format_content_optimization_data(data.get('content_optimization', {}))}
</div>
<div class="section">
<h2>Keyword Analysis</h2>
{self._format_keyword_data(data.get('keyword_analysis', {}))}
</div>
</body>
</html>
"""
return html_template
def _format_technical_seo_data(self, data):
"""Format technical SEO data"""
if not data:
return "<p>No technical SEO data available</p>"
html = ""
for url, metrics in data.items():
html += f"""
<div class="metric">
<h3>{url}</h3>
<p>Load Time: {metrics.get('load_time', 'N/A')} seconds</p>
<p>Status Code: {metrics.get('status_code', 'N/A')}</p>
<p>Content Size: {metrics.get('content_size', 'N/A')} bytes</p>
</div>
"""
return html
def _format_content_optimization_data(self, data):
"""Format content optimization data"""
if not data:
return "<p>No content optimization data available</p>"
html = ""
for page, analysis in data.items():
html += f"""
<div class="metric">
<h3>{page}</h3>
<p>Word Count: {analysis.get('word_count', 'N/A')}</p>
<p>Readability Score: {analysis.get('readability_score', 'N/A')}</p>
<p>Readability Level: {analysis.get('readability_level', 'N/A')}</p>
</div>
"""
recommendations = analysis.get('recommendations', [])
if recommendations:
html += '<div class="recommendation"><h4>Optimization Suggestions:</h4><ul>'
for rec in recommendations:
html += f'<li>{rec}</li>'
html += '</ul></div>'
return html
def _format_keyword_data(self, data):
"""Format keyword data"""
if not data:
return "<p>No keyword data available</p>"
html = ""
for keyword, metrics in data.items():
html += f"""
<div class="metric">
<h3>{keyword}</h3>
<p>Count: {metrics.get('count', 'N/A')}</p>
<p>Density: {metrics.get('density', 'N/A')}%</p>
<p>Suggestion: {metrics.get('recommendation', 'N/A')}</p>
</div>
"""
return html
Usage Example
Complete SEO Analysis Workflow
python
def main():
# Initialize modules
keyword_researcher = KeywordResearcher()
technical_analyzer = TechnicalSEOAnalyzer()
content_optimizer = ContentOptimizer()
# Email configuration (for link building)
email_config = {
'from_email': 'your-email@example.com',
'smtp_server': 'smtp.gmail.com',
'smtp_port': 587,
'username': 'your-email@example.com',
'password': 'your-app-password'
}
backlink_builder = BacklinkBuilder(email_config)
report_generator = SEOReportGenerator()
# Target website and keywords
target_url = "https://example.com"
target_keywords = ["SEO optimization", "search engine optimization", "website optimization"]
# Execute analysis
analysis_results = {}
# 1. Technical SEO Audit
print("Performing technical SEO audit...")
technical_results = technical_analyzer.check_page_speed(target_url)
meta_results = technical_analyzer.analyze_meta_tags(target_url)
analysis_results['technical_seo'] = {
target_url: {**technical_results, **meta_results}
}
# 2. Content Optimization Analysis
print("Performing content optimization analysis...")
# Need to get page content here
response = requests.get(target_url)
content = response.text
keyword_density = content_optimizer.analyze_keyword_density(content, target_keywords)
content_quality = content_optimizer.analyze_content_quality(content)
analysis_results['content_optimization'] = {
target_url: {**content_quality}
}
analysis_results['keyword_analysis'] = keyword_density
# 3. Link Building Analysis
print("Discovering link opportunities...")
competitor_urls = ["https://competitor1.com", "https://competitor2.com"]
link_opportunities = backlink_builder.find_link_opportunities(
target_keywords,
competitor_urls
)
# Save link opportunities
prospects_file = backlink_builder.save_prospects_to_file(link_opportunities)
print(f"Found {len(link_opportunities)} link opportunities, saved to {prospects_file}")
# Generate outreach email (example)
if link_opportunities:
sample_opportunity = link_opportunities[0]
if sample_opportunity.get('contact_info', {}).get('email'):
email_content = backlink_builder.generate_outreach_email(
sample_opportunity,
target_url,
f"{target_url}/your-content-page"
)
print("Sample outreach email generated")
analysis_results['backlink_opportunities'] = {
'total_found': len(link_opportunities),
'high_quality': len([opp for opp in link_opportunities if opp.get('score', 0) > 75]),
'with_contact_info': len([opp for opp in link_opportunities if opp.get('contact_info', {}).get('email')])
}
# 4. Generate Report
print("Generating report...")
report_path = report_generator.generate_comprehensive_report(analysis_results)
print(f"Report generated: {report_path}")
if __name__ == "__main__":
main()
Automated Scheduling
Schedule Regular SEO Audits
python
def schedule_seo_analysis():
"""Set up scheduled SEO analysis tasks"""
# Execute every day at 9 AM
schedule.every().day.at("09:00").do(main)
# Execute comprehensive analysis every Monday
schedule.every().monday.at("10:00").do(comprehensive_analysis)
print("SEO automation tasks started...")
while True:
schedule.run_pending()
time.sleep(60) # Check every minute
def comprehensive_analysis():
"""Execute comprehensive SEO analysis"""
# Logic including more in-depth analysis
pass
Project Deployment & Extension
Configuration Management
Create config.ini file:
ini
[DEFAULT] target_urls = https://example1.com,https://example2.com target_keywords = SEO optimization,search engine optimization,website optimization [API_KEYS] google_api_key = your_google_api_key google_cx = your_custom_search_engine_id [SETTINGS] report_output_dir = reports analysis_frequency = daily email_notifications = true
Docker Deployment
Dockerfile
FROM python:3.9-slim WORKDIR /app COPY requirements.txt . RUN pip install -r requirements.txt COPY . . CMD ["python", "main.py"]
Summary
Through the practical guide in this article, we have successfully built a fully functional SEO automation tool. This tool offers the following advantages:
- Comprehensiveness: Covers multiple dimensions like Technical SEO, Content Optimization, Keyword Analysis.
- Automation: Supports scheduled execution and automated report generation.
- Extensibility: Modular design facilitates adding new features.
- Practicality: Provides specific optimization suggestions and data support.
Future Optimization Directions
- Integrate More Data Sources: Such as Google Search Console API, Baidu Webmaster Tools API.
- Enhance AI Capabilities: Use machine learning algorithms for more intelligent analysis.
- Visualization Upgrade: Develop a web interface for more intuitive data presentation.
- Mobile Support: Add mobile SEO detection features.
- Competitor Monitoring: Implement automated competitor analysis.
Through continuous iteration and optimization, this SEO automation tool will become a powerful assistant in your digital marketing work, helping you achieve twice the result with half the effort on your search engine optimization journey.