try
This commit is contained in:
+167
-121
@@ -30,6 +30,8 @@ from pathlib import Path
|
|||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
import hashlib
|
import hashlib
|
||||||
from rate_limiter import RateLimiter
|
from rate_limiter import RateLimiter
|
||||||
|
from wordpress_xmlrpc.client import Client
|
||||||
|
from wordpress_xmlrpc.methods.media import UploadFile, NewPost
|
||||||
|
|
||||||
# Configure logging
|
# Configure logging
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
@@ -51,6 +53,17 @@ used_images = set()
|
|||||||
pixabay_rate_limiter = RateLimiter(max_requests=100, time_window=3600) # 100 requests per hour
|
pixabay_rate_limiter = RateLimiter(max_requests=100, time_window=3600) # 100 requests per hour
|
||||||
flickr_rate_limiter = RateLimiter(max_requests=3600, time_window=3600) # 3600 requests per hour
|
flickr_rate_limiter = RateLimiter(max_requests=3600, time_window=3600) # 3600 requests per hour
|
||||||
|
|
||||||
|
# Add file paths
|
||||||
|
FILE_PATHS = {
|
||||||
|
"posted_rss_titles": "/home/shane/foodie_automator/posted_rss_titles.json",
|
||||||
|
"posted_reddit_titles": "/home/shane/foodie_automator/posted_reddit_titles.json",
|
||||||
|
"used_images": "/home/shane/foodie_automator/used_images.json",
|
||||||
|
"recent_posts": "/home/shane/foodie_automator/recent_posts.json",
|
||||||
|
"x_post_counts": "/home/shane/foodie_automator/x_post_counts.json"
|
||||||
|
}
|
||||||
|
|
||||||
|
USED_IMAGES_FILE = FILE_PATHS["used_images"]
|
||||||
|
|
||||||
def validate_json_entry(entry: Dict[str, Any]) -> bool:
|
def validate_json_entry(entry: Dict[str, Any]) -> bool:
|
||||||
"""Validate the structure of a JSON entry."""
|
"""Validate the structure of a JSON entry."""
|
||||||
required_fields = {"title", "timestamp"}
|
required_fields = {"title", "timestamp"}
|
||||||
@@ -133,7 +146,7 @@ def save_json_file(file_path, title, timestamp):
|
|||||||
|
|
||||||
def load_post_counts():
|
def load_post_counts():
|
||||||
counts = []
|
counts = []
|
||||||
filename = '/home/shane/foodie_automator/x_post_counts.json'
|
filename = FILE_PATHS["x_post_counts"]
|
||||||
if os.path.exists(filename):
|
if os.path.exists(filename):
|
||||||
try:
|
try:
|
||||||
with open(filename, 'r') as f:
|
with open(filename, 'r') as f:
|
||||||
@@ -175,7 +188,7 @@ def load_post_counts():
|
|||||||
return counts
|
return counts
|
||||||
|
|
||||||
def save_post_counts(counts):
|
def save_post_counts(counts):
|
||||||
with open('/home/shane/foodie_automator/x_post_counts.json', 'w') as f:
|
with open(FILE_PATHS["x_post_counts"], 'w') as f:
|
||||||
for item in counts:
|
for item in counts:
|
||||||
json.dump(item, f)
|
json.dump(item, f)
|
||||||
f.write('\n')
|
f.write('\n')
|
||||||
@@ -471,92 +484,107 @@ def upload_image_to_wp(image_url: str, post_title: str, wp_base_url: str, wp_use
|
|||||||
logger.error(f"Image upload to WP failed for '{post_title}': {e}")
|
logger.error(f"Image upload to WP failed for '{post_title}': {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def post_to_wp(post_data: Dict[str, Any], category: str, link: str, author: Dict[str, str],
|
def post_to_wp(
|
||||||
image_url: Optional[str] = None, original_source: str = "",
|
post_data: Dict[str, Any],
|
||||||
image_source: str = "Pixabay", uploader: Optional[str] = None,
|
category: str,
|
||||||
pixabay_url: Optional[str] = None, interest_score: int = 4,
|
link: str,
|
||||||
post_id: Optional[int] = None, should_post_tweet: bool = True) -> Tuple[Optional[int], Optional[str]]:
|
author: Dict[str, str],
|
||||||
"""Post content to WordPress with improved error handling and validation."""
|
image_url: Optional[str] = None,
|
||||||
|
original_source: Optional[str] = None,
|
||||||
|
image_source: Optional[str] = None,
|
||||||
|
uploader: Optional[str] = None,
|
||||||
|
pixabay_url: Optional[str] = None,
|
||||||
|
interest_score: Optional[int] = None
|
||||||
|
) -> Tuple[Optional[int], Optional[str]]:
|
||||||
|
"""
|
||||||
|
Post content to WordPress with proper attribution and formatting.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
post_data: The post content and metadata
|
||||||
|
category: The post category
|
||||||
|
link: The original article link
|
||||||
|
author: The author information
|
||||||
|
image_url: Optional image URL
|
||||||
|
original_source: Optional original source name
|
||||||
|
image_source: Optional image source
|
||||||
|
uploader: Optional image uploader
|
||||||
|
pixabay_url: Optional Pixabay image URL
|
||||||
|
interest_score: Optional interest score
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (post_id, post_url) or (None, None) if failed
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
# Validate input data
|
# Load WordPress credentials from environment
|
||||||
if not isinstance(post_data, dict) or "title" not in post_data or "content" not in post_data:
|
wp_url = os.getenv('WORDPRESS_URL')
|
||||||
logger.error(f"Invalid post_data format: {post_data}")
|
wp_username = os.getenv('WORDPRESS_USERNAME')
|
||||||
|
wp_password = os.getenv('WORDPRESS_PASSWORD')
|
||||||
|
|
||||||
|
if not all([wp_url, wp_username, wp_password]):
|
||||||
|
logger.error("Missing WordPress credentials in environment variables")
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
if not isinstance(author, dict) or "username" not in author or "password" not in author:
|
# Initialize WordPress API client
|
||||||
logger.error(f"Invalid author data: {author}")
|
wp = Client(
|
||||||
return None, None
|
wp_url,
|
||||||
|
wp_username,
|
||||||
|
wp_password
|
||||||
|
)
|
||||||
|
|
||||||
# Get category ID
|
# Upload featured image if provided
|
||||||
category_id = wp_api.get_category_id(category)
|
featured_image_id = None
|
||||||
if not category_id:
|
|
||||||
logger.error(f"Failed to get/create category '{category}'")
|
|
||||||
return None, None
|
|
||||||
|
|
||||||
# Prepare tags
|
|
||||||
tags = [1] # Default tag
|
|
||||||
if interest_score >= 9:
|
|
||||||
picks_tag_id = wp_api.get_tag_id("Picks")
|
|
||||||
if picks_tag_id:
|
|
||||||
tags.append(picks_tag_id)
|
|
||||||
|
|
||||||
# Handle image upload
|
|
||||||
image_id = None
|
|
||||||
if image_url:
|
if image_url:
|
||||||
image_id = upload_image_to_wp(
|
try:
|
||||||
image_url, post_data["title"],
|
# Download image
|
||||||
wp_api.base_url, author["username"], author["password"],
|
response = requests.get(image_url, timeout=30)
|
||||||
image_source, uploader, pixabay_url
|
response.raise_for_status()
|
||||||
)
|
|
||||||
|
|
||||||
# Prepare post payload
|
# Create image filename
|
||||||
payload = {
|
image_filename = f"{post_data['title'].replace(' ', '_')}.jpg"
|
||||||
"title": post_data["title"],
|
|
||||||
"content": "\n".join(f"<p>{para}</p>" for para in post_data["content"].split('\n') if para.strip()),
|
# Upload to WordPress
|
||||||
"status": "publish",
|
media_data = {
|
||||||
"categories": [category_id],
|
'file': (image_filename, response.content, 'image/jpeg'),
|
||||||
"tags": tags,
|
'title': post_data['title'],
|
||||||
"author": author.get("id", 5), # Default author ID
|
'caption': f"Image source: {image_source}\nUploader: {uploader}\nURL: {pixabay_url}" if image_source else None
|
||||||
"meta": {
|
}
|
||||||
"original_link": link,
|
|
||||||
"original_source": original_source,
|
media = wp.call(UploadFile(media_data))
|
||||||
"interest_score": interest_score
|
featured_image_id = media['id']
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to upload image '{image_filename}' to WordPress: {e}")
|
||||||
|
# Continue without image
|
||||||
|
|
||||||
|
# Prepare post data
|
||||||
|
post = {
|
||||||
|
'title': post_data['title'],
|
||||||
|
'content': post_data['content'],
|
||||||
|
'status': 'publish',
|
||||||
|
'categories': [category],
|
||||||
|
'author': author['id'],
|
||||||
|
'featured_media': featured_image_id,
|
||||||
|
'meta': {
|
||||||
|
'original_source': original_source,
|
||||||
|
'original_link': link,
|
||||||
|
'interest_score': interest_score
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if image_id:
|
# Create post
|
||||||
payload["featured_media"] = image_id
|
result = wp.call(NewPost(post))
|
||||||
|
|
||||||
# Create or update post
|
if result and 'id' in result:
|
||||||
endpoint = f"posts/{post_id}" if post_id else "posts"
|
post_id = result['id']
|
||||||
method = "POST" if not post_id else "PUT"
|
post_url = f"{wp_url}/?p={post_id}"
|
||||||
|
logger.info(f"Successfully posted to WordPress (ID: {post_id})")
|
||||||
|
return post_id, post_url
|
||||||
|
|
||||||
response = wp_api._make_request(method, endpoint, json=payload)
|
logger.error("Failed to create WordPress post")
|
||||||
if not response:
|
return None, None
|
||||||
logger.error("Failed to create/update WordPress post")
|
|
||||||
return None, None
|
|
||||||
|
|
||||||
post_id = response["id"]
|
|
||||||
post_url = response["link"]
|
|
||||||
|
|
||||||
# Save to recent posts
|
|
||||||
timestamp = datetime.now(timezone.utc).isoformat()
|
|
||||||
save_post_to_recent(post_data["title"], post_url, author["username"], timestamp)
|
|
||||||
|
|
||||||
# Post tweet if requested
|
|
||||||
if should_post_tweet:
|
|
||||||
try:
|
|
||||||
post = {"title": post_data["title"], "url": post_url}
|
|
||||||
tweet = generate_article_tweet(author, post, author.get("persona", "Foodie Critic"))
|
|
||||||
if post_tweet(author, tweet):
|
|
||||||
logger.info(f"Successfully posted article tweet for {author['username']}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error posting article tweet: {e}")
|
|
||||||
|
|
||||||
logger.info(f"Successfully posted/updated '{post_data['title']}' (ID: {post_id})")
|
|
||||||
return post_id, post_url
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"WordPress posting failed: {e}")
|
logger.error(f"WordPress API request failed: {e}")
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
def determine_paragraph_count(interest_score):
|
def determine_paragraph_count(interest_score):
|
||||||
@@ -838,7 +866,7 @@ def prepare_post_data(final_summary, original_title, context_info=""):
|
|||||||
|
|
||||||
def save_post_to_recent(post_title, post_url, author_username, timestamp):
|
def save_post_to_recent(post_title, post_url, author_username, timestamp):
|
||||||
try:
|
try:
|
||||||
recent_posts = load_json_file('/home/shane/foodie_automator/recent_posts.json')
|
recent_posts = load_json_file(FILE_PATHS["recent_posts"], 24)
|
||||||
entry = {
|
entry = {
|
||||||
"title": post_title,
|
"title": post_title,
|
||||||
"url": post_url,
|
"url": post_url,
|
||||||
@@ -846,7 +874,7 @@ def save_post_to_recent(post_title, post_url, author_username, timestamp):
|
|||||||
"timestamp": timestamp
|
"timestamp": timestamp
|
||||||
}
|
}
|
||||||
recent_posts.append(entry)
|
recent_posts.append(entry)
|
||||||
with open('/home/shane/foodie_automator/recent_posts.json', 'w') as f:
|
with open(FILE_PATHS["recent_posts"], 'w') as f:
|
||||||
for item in recent_posts:
|
for item in recent_posts:
|
||||||
json.dump(item, f)
|
json.dump(item, f)
|
||||||
f.write('\n')
|
f.write('\n')
|
||||||
@@ -857,9 +885,9 @@ def save_post_to_recent(post_title, post_url, author_username, timestamp):
|
|||||||
def prune_recent_posts():
|
def prune_recent_posts():
|
||||||
try:
|
try:
|
||||||
cutoff = (datetime.now(timezone.utc) - timedelta(hours=24)).isoformat()
|
cutoff = (datetime.now(timezone.utc) - timedelta(hours=24)).isoformat()
|
||||||
recent_posts = load_json_file('/home/shane/foodie_automator/recent_posts.json')
|
recent_posts = load_json_file(FILE_PATHS["recent_posts"], 24)
|
||||||
recent_posts = [entry for entry in recent_posts if entry["timestamp"] > cutoff]
|
recent_posts = [entry for entry in recent_posts if entry["timestamp"] > cutoff]
|
||||||
with open('/home/shane/foodie_automator/recent_posts.json', 'w') as f:
|
with open(FILE_PATHS["recent_posts"], 'w') as f:
|
||||||
for item in recent_posts:
|
for item in recent_posts:
|
||||||
json.dump(item, f)
|
json.dump(item, f)
|
||||||
f.write('\n')
|
f.write('\n')
|
||||||
@@ -995,60 +1023,75 @@ def generate_image_query(title: str, summary: str) -> Tuple[str, List[str], bool
|
|||||||
logger.warning(f"Image query generation failed: {e}. Using title as fallback.")
|
logger.warning(f"Image query generation failed: {e}. Using title as fallback.")
|
||||||
return title, [], True
|
return title, [], True
|
||||||
|
|
||||||
def smart_image_and_filter(title: str, summary: str) -> Tuple[str, List[str], bool]:
|
def smart_image_and_filter(title: str, content: str) -> Tuple[str, List[str], bool]:
|
||||||
"""Smart image filtering with improved error handling."""
|
"""
|
||||||
|
Generate an image query and determine if the content should be filtered.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
title: The article title
|
||||||
|
content: The article content
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (image_query, relevance_keywords, should_skip)
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
content = f"{title}\n\n{summary}"
|
# Prepare prompt for GPT
|
||||||
|
prompt = f"""
|
||||||
|
Analyze this food-related content and determine:
|
||||||
|
1. A good image search query
|
||||||
|
2. Relevant keywords
|
||||||
|
3. Whether to skip this content
|
||||||
|
|
||||||
prompt = (
|
Title: {title}
|
||||||
"Analyze this article title and summary. Extract key entities (brands, locations, cuisines, or topics) "
|
Content: {content}
|
||||||
"for an image search about food industry trends or viral content. Prioritize specific terms if present, "
|
|
||||||
"otherwise focus on the main theme. "
|
|
||||||
"Return 'SKIP' if the article is about home appliances, recipes, promotions, or contains 'homemade', else 'KEEP'. "
|
|
||||||
"Return as JSON with double quotes for all property names and string values (e.g., {\"image_query\": \"specific term\", \"relevance\": [\"keyword1\", \"keyword2\"], \"action\": \"KEEP\" or \"SKIP\"})."
|
|
||||||
)
|
|
||||||
|
|
||||||
|
Return a JSON object with:
|
||||||
|
- image_query: A concise search query for finding relevant images
|
||||||
|
- relevance: List of relevant keywords
|
||||||
|
- action: Either "KEEP" or "SKIP"
|
||||||
|
|
||||||
|
Keep content that is:
|
||||||
|
- About food trends, innovations, or interesting culinary topics
|
||||||
|
- Has broad appeal to food enthusiasts
|
||||||
|
- Contains unique or noteworthy information
|
||||||
|
|
||||||
|
Skip content that is:
|
||||||
|
- Basic recipes or cooking instructions
|
||||||
|
- Restaurant reviews or menu items
|
||||||
|
- Generic food news without unique angles
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Get response from GPT
|
||||||
response = client.chat.completions.create(
|
response = client.chat.completions.create(
|
||||||
model=LIGHT_TASK_MODEL,
|
model="gpt-4",
|
||||||
messages=[
|
messages=[
|
||||||
{"role": "system", "content": prompt},
|
{"role": "system", "content": "You are a food content curator."},
|
||||||
{"role": "user", "content": content}
|
{"role": "user", "content": prompt}
|
||||||
],
|
],
|
||||||
max_tokens=100
|
temperature=0.7,
|
||||||
|
max_tokens=150
|
||||||
)
|
)
|
||||||
raw_result = response.choices[0].message.content.strip()
|
|
||||||
logger.info(f"Raw GPT smart image/filter response: '{raw_result}'")
|
|
||||||
|
|
||||||
# Remove ```json markers and fix single quotes in JSON structure
|
|
||||||
cleaned_result = re.sub(r'```json\s*|\s*```', '', raw_result).strip()
|
|
||||||
# Replace single quotes with double quotes, but preserve single quotes within string values
|
|
||||||
fixed_result = re.sub(r"(?<!\\)'(?=\s*[\w\s]*\])|(?<=\[|\{|\s)'|'(?=\s*[\]\},:])|(?<=\w)'(?=\s*:)", '"', cleaned_result)
|
|
||||||
|
|
||||||
|
# Parse response
|
||||||
try:
|
try:
|
||||||
result = json.loads(fixed_result)
|
result = json.loads(response.choices[0].message.content)
|
||||||
|
image_query = result.get("image_query", "")
|
||||||
|
relevance = result.get("relevance", [])
|
||||||
|
action = result.get("action", "KEEP")
|
||||||
|
|
||||||
|
logger.info(f"Raw GPT smart image/filter response: '{response.choices[0].message.content}'")
|
||||||
|
logger.info(f"Smart image query: {image_query}, Relevance: {relevance}, Skip: {action == 'SKIP'}")
|
||||||
|
|
||||||
|
return image_query, relevance, action == "SKIP"
|
||||||
|
|
||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError as e:
|
||||||
logger.warning(f"JSON parsing failed: {e}, raw: '{fixed_result}'. Using fallback.")
|
logger.warning(f"JSON parsing failed: {e}, raw: '{response.choices[0].message.content}'. Using fallback.")
|
||||||
return "food trends", ["cuisine", "dining"], False
|
# Fallback to basic filtering
|
||||||
|
return title, [], "recipe" in title.lower() or "how to" in title.lower()
|
||||||
if not isinstance(result, dict) or "image_query" not in result or "relevance" not in result or "action" not in result:
|
|
||||||
logger.warning(f"Invalid GPT response format: {result}, using fallback")
|
|
||||||
return "food trends", ["cuisine", "dining"], False
|
|
||||||
|
|
||||||
image_query = result["image_query"]
|
|
||||||
relevance_keywords = result["relevance"]
|
|
||||||
skip_flag = result["action"] == "SKIP" or "homemade" in title.lower() or "homemade" in summary.lower()
|
|
||||||
|
|
||||||
logger.info(f"Smart image query: {image_query}, Relevance: {relevance_keywords}, Skip: {skip_flag}")
|
|
||||||
|
|
||||||
if not image_query or len(image_query.split()) < 2:
|
|
||||||
logger.warning(f"Image query '{image_query}' too vague, using fallback")
|
|
||||||
return "food trends", ["cuisine", "dining"], skip_flag
|
|
||||||
|
|
||||||
return image_query, relevance_keywords, skip_flag
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Smart image/filter failed: {e}, using fallback")
|
logger.error(f"Error in smart image/filter: {e}")
|
||||||
return "food trends", ["cuisine", "dining"], False
|
return title, [], False
|
||||||
|
|
||||||
def classify_keywords(keywords):
|
def classify_keywords(keywords):
|
||||||
prompt = (
|
prompt = (
|
||||||
@@ -1159,6 +1202,9 @@ def get_flickr_image(search_query: str, relevance_keywords: List[str] = None) ->
|
|||||||
|
|
||||||
# Get photo info for attribution
|
# Get photo info for attribution
|
||||||
info = photo.getInfo()
|
info = photo.getInfo()
|
||||||
|
if not hasattr(info, 'owner') or not hasattr(info.owner, 'username'):
|
||||||
|
continue
|
||||||
|
|
||||||
uploader = info.owner.username
|
uploader = info.owner.username
|
||||||
page_url = f"https://www.flickr.com/photos/{info.owner.id}/{photo_id}"
|
page_url = f"https://www.flickr.com/photos/{info.owner.id}/{photo_id}"
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user