update reddit
This commit is contained in:
@@ -28,7 +28,9 @@ from foodie_utils import (
|
|||||||
prepare_post_data, smart_image_and_filter, insert_link_naturally, get_flickr_image_via_ddg
|
prepare_post_data, smart_image_and_filter, insert_link_naturally, get_flickr_image_via_ddg
|
||||||
)
|
)
|
||||||
from foodie_hooks import get_dynamic_hook, select_best_cta
|
from foodie_hooks import get_dynamic_hook, select_best_cta
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
# Flag to indicate if we're in the middle of posting
|
# Flag to indicate if we're in the middle of posting
|
||||||
is_posting = False
|
is_posting = False
|
||||||
|
|
||||||
@@ -53,7 +55,7 @@ console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(
|
|||||||
logger.addHandler(console_handler)
|
logger.addHandler(console_handler)
|
||||||
logging.info("Logging initialized for foodie_automator_google.py")
|
logging.info("Logging initialized for foodie_automator_google.py")
|
||||||
|
|
||||||
client = OpenAI(api_key="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")
|
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
||||||
|
|
||||||
POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_google_titles.json'
|
POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_google_titles.json'
|
||||||
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
|
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
|
||||||
|
|||||||
+24
-12
@@ -6,6 +6,7 @@ import os
|
|||||||
import json
|
import json
|
||||||
import signal
|
import signal
|
||||||
import sys
|
import sys
|
||||||
|
import re
|
||||||
from datetime import datetime, timedelta, timezone
|
from datetime import datetime, timedelta, timezone
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
@@ -83,7 +84,14 @@ posted_titles = set(entry["title"] for entry in posted_titles_data if "title" in
|
|||||||
used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
|
used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
|
||||||
used_images = set(entry["title"] for entry in used_images_data if "title" in entry)
|
used_images = set(entry["title"] for entry in used_images_data if "title" in entry)
|
||||||
|
|
||||||
client = OpenAI(api_key="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")
|
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
||||||
|
|
||||||
|
def clean_reddit_title(title):
|
||||||
|
"""Remove Reddit flairs like [pro/chef] or [homemade] from the title."""
|
||||||
|
# Match patterns like [pro/chef], [homemade], etc. at the start of the title
|
||||||
|
cleaned_title = re.sub(r'^\[.*?\]\s*', '', title).strip()
|
||||||
|
logging.info(f"Cleaned Reddit title from '{title}' to '{cleaned_title}'")
|
||||||
|
return cleaned_title
|
||||||
|
|
||||||
def is_interesting_reddit(title, summary, upvotes, comment_count, top_comments):
|
def is_interesting_reddit(title, summary, upvotes, comment_count, top_comments):
|
||||||
try:
|
try:
|
||||||
@@ -160,8 +168,11 @@ def fetch_reddit_posts():
|
|||||||
if pub_date < cutoff_date:
|
if pub_date < cutoff_date:
|
||||||
logging.info(f"Skipping old post: {submission.title} (Published: {pub_date})")
|
logging.info(f"Skipping old post: {submission.title} (Published: {pub_date})")
|
||||||
continue
|
continue
|
||||||
|
# Clean the title before storing
|
||||||
|
cleaned_title = clean_reddit_title(submission.title)
|
||||||
articles.append({
|
articles.append({
|
||||||
"title": submission.title,
|
"title": cleaned_title, # Use cleaned title
|
||||||
|
"raw_title": submission.title, # Store raw title for reference if needed
|
||||||
"link": f"https://www.reddit.com{submission.permalink}",
|
"link": f"https://www.reddit.com{submission.permalink}",
|
||||||
"summary": submission.selftext,
|
"summary": submission.selftext,
|
||||||
"feed_title": get_clean_source_name(subreddit_name),
|
"feed_title": get_clean_source_name(subreddit_name),
|
||||||
@@ -196,15 +207,16 @@ def curate_from_reddit():
|
|||||||
max_attempts = 10
|
max_attempts = 10
|
||||||
while attempts < max_attempts and articles:
|
while attempts < max_attempts and articles:
|
||||||
article = articles.pop(0) # Take highest-upvote post
|
article = articles.pop(0) # Take highest-upvote post
|
||||||
title = article["title"]
|
title = article["title"] # Use cleaned title
|
||||||
|
raw_title = article["raw_title"] # Use raw title for deduplication
|
||||||
link = article["link"]
|
link = article["link"]
|
||||||
summary = article["summary"]
|
summary = article["summary"]
|
||||||
source_name = "Reddit"
|
source_name = "Reddit"
|
||||||
original_source = '<a href="https://www.reddit.com/">Reddit</a>'
|
original_source = '<a href="https://www.reddit.com/">Reddit</a>'
|
||||||
|
|
||||||
if title in posted_titles:
|
if raw_title in posted_titles: # Check against raw title
|
||||||
print(f"Skipping already posted post: {title}")
|
print(f"Skipping already posted post: {raw_title}")
|
||||||
logging.info(f"Skipping already posted post: {title}")
|
logging.info(f"Skipping already posted post: {raw_title}")
|
||||||
attempts += 1
|
attempts += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -220,7 +232,7 @@ def curate_from_reddit():
|
|||||||
|
|
||||||
top_comments = get_top_comments(link, reddit, limit=3)
|
top_comments = get_top_comments(link, reddit, limit=3)
|
||||||
interest_score = is_interesting_reddit(
|
interest_score = is_interesting_reddit(
|
||||||
title,
|
title, # Use cleaned title
|
||||||
summary,
|
summary,
|
||||||
article["upvotes"],
|
article["upvotes"],
|
||||||
article["comment_count"],
|
article["comment_count"],
|
||||||
@@ -241,7 +253,7 @@ def curate_from_reddit():
|
|||||||
"Do NOT introduce unrelated concepts unless in the content or comments. "
|
"Do NOT introduce unrelated concepts unless in the content or comments. "
|
||||||
"If brief, expand on the core idea with relevant context about its appeal or significance."
|
"If brief, expand on the core idea with relevant context about its appeal or significance."
|
||||||
)
|
)
|
||||||
content_to_summarize = f"{title}\n\n{summary}"
|
content_to_summarize = f"{title}\n\n{summary}" # Use cleaned title
|
||||||
if top_comments:
|
if top_comments:
|
||||||
content_to_summarize += f"\n\nTop Comments:\n{'\n'.join(top_comments)}"
|
content_to_summarize += f"\n\nTop Comments:\n{'\n'.join(top_comments)}"
|
||||||
|
|
||||||
@@ -259,7 +271,7 @@ def curate_from_reddit():
|
|||||||
|
|
||||||
final_summary = insert_link_naturally(final_summary, source_name, link)
|
final_summary = insert_link_naturally(final_summary, source_name, link)
|
||||||
|
|
||||||
post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
|
post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title) # Use cleaned title
|
||||||
if not post_data:
|
if not post_data:
|
||||||
attempts += 1
|
attempts += 1
|
||||||
continue
|
continue
|
||||||
@@ -314,9 +326,9 @@ def curate_from_reddit():
|
|||||||
is_posting = False
|
is_posting = False
|
||||||
|
|
||||||
timestamp = datetime.now(timezone.utc).isoformat()
|
timestamp = datetime.now(timezone.utc).isoformat()
|
||||||
save_json_file(POSTED_TITLES_FILE, title, timestamp)
|
save_json_file(POSTED_TITLES_FILE, raw_title, timestamp) # Save raw title
|
||||||
posted_titles.add(title)
|
posted_titles.add(raw_title) # Add raw title to set
|
||||||
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE} with timestamp {timestamp}")
|
logging.info(f"Successfully saved '{raw_title}' to {POSTED_TITLES_FILE} with timestamp {timestamp}")
|
||||||
|
|
||||||
if image_url:
|
if image_url:
|
||||||
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
|
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
|
||||||
|
|||||||
@@ -24,7 +24,9 @@ from foodie_hooks import get_dynamic_hook, select_best_cta
|
|||||||
import feedparser
|
import feedparser
|
||||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
from typing import List, Dict, Any, Optional
|
from typing import List, Dict, Any, Optional
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
# Flag to indicate if we're in the middle of posting
|
# Flag to indicate if we're in the middle of posting
|
||||||
is_posting = False
|
is_posting = False
|
||||||
|
|
||||||
|
|||||||
+4
-2
@@ -10,6 +10,8 @@ import io
|
|||||||
import tempfile
|
import tempfile
|
||||||
import requests
|
import requests
|
||||||
import time
|
import time
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
import os
|
||||||
from datetime import datetime, timezone, timedelta
|
from datetime import datetime, timezone, timedelta
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
@@ -21,8 +23,8 @@ from foodie_config import (
|
|||||||
RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS, SUMMARY_PERSONA_PROMPTS,
|
RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS, SUMMARY_PERSONA_PROMPTS,
|
||||||
get_clean_source_name, AUTHORS, LIGHT_TASK_MODEL, SUMMARY_MODEL
|
get_clean_source_name, AUTHORS, LIGHT_TASK_MODEL, SUMMARY_MODEL
|
||||||
)
|
)
|
||||||
#test
|
load_dotenv()
|
||||||
client = OpenAI(api_key="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")
|
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
||||||
|
|
||||||
def load_json_file(filename, expiration_days=None):
|
def load_json_file(filename, expiration_days=None):
|
||||||
data = []
|
data = []
|
||||||
|
|||||||
Reference in New Issue
Block a user