|
|
|
|
@ -70,48 +70,84 @@ MAX_RETRIES = 3 |
|
|
|
|
RETRY_BACKOFF = 2 |
|
|
|
|
|
|
|
|
|
def setup_logging(): |
|
|
|
|
if os.path.exists(LOG_FILE): |
|
|
|
|
with open(LOG_FILE, 'r') as f: |
|
|
|
|
lines = f.readlines() |
|
|
|
|
|
|
|
|
|
log_entries = [] |
|
|
|
|
current_entry = [] |
|
|
|
|
timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}') |
|
|
|
|
|
|
|
|
|
for line in lines: |
|
|
|
|
if timestamp_pattern.match(line): |
|
|
|
|
if current_entry: |
|
|
|
|
log_entries.append(''.join(current_entry)) |
|
|
|
|
current_entry = [line] |
|
|
|
|
else: |
|
|
|
|
current_entry.append(line) |
|
|
|
|
try: |
|
|
|
|
# Ensure log directory exists |
|
|
|
|
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True) |
|
|
|
|
logging.debug(f"Log directory created/verified: {os.path.dirname(LOG_FILE)}") |
|
|
|
|
|
|
|
|
|
if current_entry: |
|
|
|
|
log_entries.append(''.join(current_entry)) |
|
|
|
|
# Check write permissions |
|
|
|
|
if not os.access(os.path.dirname(LOG_FILE), os.W_OK): |
|
|
|
|
raise PermissionError(f"No write permission for {os.path.dirname(LOG_FILE)}") |
|
|
|
|
|
|
|
|
|
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS) |
|
|
|
|
pruned_entries = [] |
|
|
|
|
for entry in log_entries: |
|
|
|
|
try: |
|
|
|
|
timestamp = datetime.strptime(entry[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc) |
|
|
|
|
if timestamp > cutoff: |
|
|
|
|
pruned_entries.append(entry) |
|
|
|
|
except ValueError: |
|
|
|
|
logging.warning(f"Skipping malformed log entry (no timestamp): {entry[:50]}...") |
|
|
|
|
continue |
|
|
|
|
|
|
|
|
|
with open(LOG_FILE, 'w') as f: |
|
|
|
|
f.writelines(pruned_entries) |
|
|
|
|
# Test write to log file |
|
|
|
|
try: |
|
|
|
|
with open(LOG_FILE, 'a') as f: |
|
|
|
|
f.write("") |
|
|
|
|
logging.debug(f"Confirmed write access to {LOG_FILE}") |
|
|
|
|
except Exception as e: |
|
|
|
|
raise PermissionError(f"Cannot write to {LOG_FILE}: {e}") |
|
|
|
|
|
|
|
|
|
# Prune old logs |
|
|
|
|
if os.path.exists(LOG_FILE): |
|
|
|
|
with open(LOG_FILE, 'r') as f: |
|
|
|
|
lines = f.readlines() |
|
|
|
|
|
|
|
|
|
log_entries = [] |
|
|
|
|
current_entry = [] |
|
|
|
|
timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}') |
|
|
|
|
|
|
|
|
|
for line in lines: |
|
|
|
|
if timestamp_pattern.match(line): |
|
|
|
|
if current_entry: |
|
|
|
|
log_entries.append(''.join(current_entry)) |
|
|
|
|
current_entry = [line] |
|
|
|
|
else: |
|
|
|
|
current_entry.append(line) |
|
|
|
|
|
|
|
|
|
if current_entry: |
|
|
|
|
log_entries.append(''.join(current_entry)) |
|
|
|
|
|
|
|
|
|
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS) |
|
|
|
|
pruned_entries = [] |
|
|
|
|
for entry in log_entries: |
|
|
|
|
try: |
|
|
|
|
timestamp = datetime.strptime(entry[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc) |
|
|
|
|
if timestamp > cutoff: |
|
|
|
|
pruned_entries.append(entry) |
|
|
|
|
except ValueError: |
|
|
|
|
logging.warning(f"Skipping malformed log entry (no timestamp): {entry[:50]}...") |
|
|
|
|
continue |
|
|
|
|
|
|
|
|
|
with open(LOG_FILE, 'w') as f: |
|
|
|
|
f.writelines(pruned_entries) |
|
|
|
|
logging.debug(f"Log file pruned: {LOG_FILE}") |
|
|
|
|
|
|
|
|
|
# Configure logging |
|
|
|
|
logging.basicConfig( |
|
|
|
|
filename=LOG_FILE, |
|
|
|
|
level=logging.INFO, |
|
|
|
|
format="%(asctime)s - %(levelname)s - %(message)s", |
|
|
|
|
datefmt="%Y-%m-%d %H:%M:%S", |
|
|
|
|
force=True # Ensure this config takes precedence |
|
|
|
|
) |
|
|
|
|
console_handler = logging.StreamHandler() |
|
|
|
|
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) |
|
|
|
|
logging.getLogger().addHandler(console_handler) |
|
|
|
|
logging.info("Logging initialized for foodie_automator_google.py") |
|
|
|
|
|
|
|
|
|
logger = logging.getLogger() |
|
|
|
|
logger.setLevel(logging.INFO) |
|
|
|
|
file_handler = logging.FileHandler(LOG_FILE, mode='a') |
|
|
|
|
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) |
|
|
|
|
logger.addHandler(file_handler) |
|
|
|
|
console_handler = logging.StreamHandler() |
|
|
|
|
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) |
|
|
|
|
logger.addHandler(console_handler) |
|
|
|
|
logging.info("Logging initialized for foodie_automator_google.py") |
|
|
|
|
except Exception as e: |
|
|
|
|
# Fallback to console logging if file logging fails |
|
|
|
|
logging.basicConfig( |
|
|
|
|
level=logging.INFO, |
|
|
|
|
format="%(asctime)s - %(levelname)s - %(message)s", |
|
|
|
|
datefmt="%Y-%m-%d %H:%M:%S", |
|
|
|
|
force=True |
|
|
|
|
) |
|
|
|
|
logging.error(f"Failed to setup file logging for {LOG_FILE}: {e}. Using console logging.") |
|
|
|
|
console_handler = logging.StreamHandler() |
|
|
|
|
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) |
|
|
|
|
logging.getLogger().addHandler(console_handler) |
|
|
|
|
logging.info("Console logging initialized as fallback for foodie_automator_google.py") |
|
|
|
|
|
|
|
|
|
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) |
|
|
|
|
|
|
|
|
|
@ -253,11 +289,9 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat |
|
|
|
|
try: |
|
|
|
|
logging.debug(f"Using {len(posted_titles)} posted titles and {len(used_images)} used images") |
|
|
|
|
|
|
|
|
|
# Define regions to scrape |
|
|
|
|
regions = ['US', 'GB', 'AU'] |
|
|
|
|
all_trends = [] |
|
|
|
|
|
|
|
|
|
# Scrape trends for each region |
|
|
|
|
for geo in regions: |
|
|
|
|
logging.info(f"Scraping Google Trends for geo={geo}") |
|
|
|
|
trends = scrape_google_trends(geo=geo) |
|
|
|
|
@ -267,7 +301,6 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat |
|
|
|
|
else: |
|
|
|
|
logging.warning(f"No trends collected for geo={geo}") |
|
|
|
|
|
|
|
|
|
# Remove duplicates by title and sort by search volume |
|
|
|
|
unique_trends = [] |
|
|
|
|
seen_titles = set() |
|
|
|
|
for trend in all_trends: |
|
|
|
|
@ -277,10 +310,9 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat |
|
|
|
|
|
|
|
|
|
if not unique_trends: |
|
|
|
|
logging.info("No Google Trends data available across regions") |
|
|
|
|
sleep_time = random.randint(1200, 1800) # 20–30 minutes |
|
|
|
|
sleep_time = random.randint(1200, 1800) |
|
|
|
|
return None, None, sleep_time |
|
|
|
|
|
|
|
|
|
# Sort trends by search volume in descending order |
|
|
|
|
unique_trends.sort(key=lambda x: x["search_volume"], reverse=True) |
|
|
|
|
logging.info(f"Total unique trends collected: {len(unique_trends)}") |
|
|
|
|
|
|
|
|
|
@ -293,14 +325,13 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat |
|
|
|
|
summary = trend.get("summary", "") |
|
|
|
|
source_name = trend.get("source", "Google Trends") |
|
|
|
|
original_source = f'<a href="{link}">{source_name}</a>' |
|
|
|
|
original_url = link # Store for fallback |
|
|
|
|
original_url = link |
|
|
|
|
|
|
|
|
|
if title in posted_titles: |
|
|
|
|
logging.info(f"Skipping already posted trend: {title}") |
|
|
|
|
attempts += 1 |
|
|
|
|
continue |
|
|
|
|
|
|
|
|
|
# Check author availability before GPT calls |
|
|
|
|
author = get_next_author_round_robin() |
|
|
|
|
if not author: |
|
|
|
|
logging.info(f"Skipping trend '{title}' due to tweet rate limits for all authors") |
|
|
|
|
@ -312,8 +343,12 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat |
|
|
|
|
|
|
|
|
|
logging.info(f"Trying Google Trend: {title} from {source_name}") |
|
|
|
|
|
|
|
|
|
# Fetch DuckDuckGo context early to enhance smart_image_and_filter |
|
|
|
|
ddg_context = fetch_duckduckgo_news_context(title) |
|
|
|
|
enhanced_summary = summary + "\n\nAdditional Context: " + ddg_context if summary else ddg_context |
|
|
|
|
|
|
|
|
|
try: |
|
|
|
|
image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, summary) |
|
|
|
|
image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, enhanced_summary) |
|
|
|
|
except Exception as e: |
|
|
|
|
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}") |
|
|
|
|
attempts += 1 |
|
|
|
|
@ -324,7 +359,6 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat |
|
|
|
|
attempts += 1 |
|
|
|
|
continue |
|
|
|
|
|
|
|
|
|
ddg_context = fetch_duckduckgo_news_context(title) |
|
|
|
|
scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}" |
|
|
|
|
interest_score = is_interesting(scoring_content) |
|
|
|
|
logging.info(f"Interest score for '{title}': {interest_score}") |
|
|
|
|
@ -405,11 +439,10 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat |
|
|
|
|
) |
|
|
|
|
if not post_id: |
|
|
|
|
logging.warning(f"Failed to post to WordPress for '{title}', using original URL: {original_url}") |
|
|
|
|
post_url = original_url # Fallback to original trend URL |
|
|
|
|
post_url = original_url |
|
|
|
|
else: |
|
|
|
|
logging.info(f"Posted to WordPress for {author_username}: {post_url}") |
|
|
|
|
|
|
|
|
|
# Update post with actual post_url |
|
|
|
|
post_url_encoded = quote(post_url) |
|
|
|
|
share_links = share_links_template.format(post_url=post_url_encoded) |
|
|
|
|
post_data["content"] = f"{final_summary}\n\n{share_links}" |
|
|
|
|
@ -420,7 +453,7 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat |
|
|
|
|
category=category, |
|
|
|
|
link=link, |
|
|
|
|
author=author, |
|
|
|
|
image_url=None, # Skip image re-upload |
|
|
|
|
image_url=None, |
|
|
|
|
original_source=original_source, |
|
|
|
|
image_source=image_source, |
|
|
|
|
uploader=uploader, |
|
|
|
|
@ -431,7 +464,7 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat |
|
|
|
|
) |
|
|
|
|
except Exception as e: |
|
|
|
|
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True) |
|
|
|
|
post_url = original_url # Fallback to original trend URL |
|
|
|
|
post_url = original_url |
|
|
|
|
finally: |
|
|
|
|
is_posting = False |
|
|
|
|
|
|
|
|
|
@ -446,15 +479,15 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat |
|
|
|
|
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") |
|
|
|
|
|
|
|
|
|
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id or 'N/A'}) from Google Trends *****") |
|
|
|
|
sleep_time = random.randint(1200, 1800) # 20–30 minutes |
|
|
|
|
sleep_time = random.randint(1200, 1800) |
|
|
|
|
return post_data, category, sleep_time |
|
|
|
|
|
|
|
|
|
logging.info("No interesting Google Trend found after attempts") |
|
|
|
|
sleep_time = random.randint(1200, 1800) # 20–30 minutes |
|
|
|
|
sleep_time = random.randint(1200, 1800) |
|
|
|
|
return None, None, sleep_time |
|
|
|
|
except Exception as e: |
|
|
|
|
logging.error(f"Unexpected error in curate_from_google_trends: {e}", exc_info=True) |
|
|
|
|
sleep_time = random.randint(1200, 1800) # 20–30 minutes |
|
|
|
|
sleep_time = random.randint(1200, 1800) |
|
|
|
|
return None, None, sleep_time |
|
|
|
|
|
|
|
|
|
def run_google_trends_automator(): |
|
|
|
|
|