fix better images
This commit is contained in:
+87
-54
@@ -70,48 +70,84 @@ MAX_RETRIES = 3
|
||||
RETRY_BACKOFF = 2
|
||||
|
||||
def setup_logging():
|
||||
if os.path.exists(LOG_FILE):
|
||||
with open(LOG_FILE, 'r') as f:
|
||||
lines = f.readlines()
|
||||
try:
|
||||
# Ensure log directory exists
|
||||
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
|
||||
logging.debug(f"Log directory created/verified: {os.path.dirname(LOG_FILE)}")
|
||||
|
||||
log_entries = []
|
||||
current_entry = []
|
||||
timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}')
|
||||
# Check write permissions
|
||||
if not os.access(os.path.dirname(LOG_FILE), os.W_OK):
|
||||
raise PermissionError(f"No write permission for {os.path.dirname(LOG_FILE)}")
|
||||
|
||||
for line in lines:
|
||||
if timestamp_pattern.match(line):
|
||||
if current_entry:
|
||||
log_entries.append(''.join(current_entry))
|
||||
current_entry = [line]
|
||||
else:
|
||||
current_entry.append(line)
|
||||
# Test write to log file
|
||||
try:
|
||||
with open(LOG_FILE, 'a') as f:
|
||||
f.write("")
|
||||
logging.debug(f"Confirmed write access to {LOG_FILE}")
|
||||
except Exception as e:
|
||||
raise PermissionError(f"Cannot write to {LOG_FILE}: {e}")
|
||||
|
||||
# Prune old logs
|
||||
if os.path.exists(LOG_FILE):
|
||||
with open(LOG_FILE, 'r') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
log_entries = []
|
||||
current_entry = []
|
||||
timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}')
|
||||
|
||||
for line in lines:
|
||||
if timestamp_pattern.match(line):
|
||||
if current_entry:
|
||||
log_entries.append(''.join(current_entry))
|
||||
current_entry = [line]
|
||||
else:
|
||||
current_entry.append(line)
|
||||
|
||||
if current_entry:
|
||||
log_entries.append(''.join(current_entry))
|
||||
|
||||
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
|
||||
pruned_entries = []
|
||||
for entry in log_entries:
|
||||
try:
|
||||
timestamp = datetime.strptime(entry[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
|
||||
if timestamp > cutoff:
|
||||
pruned_entries.append(entry)
|
||||
except ValueError:
|
||||
logging.warning(f"Skipping malformed log entry (no timestamp): {entry[:50]}...")
|
||||
continue
|
||||
|
||||
with open(LOG_FILE, 'w') as f:
|
||||
f.writelines(pruned_entries)
|
||||
logging.debug(f"Log file pruned: {LOG_FILE}")
|
||||
|
||||
if current_entry:
|
||||
log_entries.append(''.join(current_entry))
|
||||
|
||||
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
|
||||
pruned_entries = []
|
||||
for entry in log_entries:
|
||||
try:
|
||||
timestamp = datetime.strptime(entry[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
|
||||
if timestamp > cutoff:
|
||||
pruned_entries.append(entry)
|
||||
except ValueError:
|
||||
logging.warning(f"Skipping malformed log entry (no timestamp): {entry[:50]}...")
|
||||
continue
|
||||
|
||||
with open(LOG_FILE, 'w') as f:
|
||||
f.writelines(pruned_entries)
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
filename=LOG_FILE,
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
force=True # Ensure this config takes precedence
|
||||
)
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
|
||||
logging.getLogger().addHandler(console_handler)
|
||||
logging.info("Logging initialized for foodie_automator_google.py")
|
||||
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(logging.INFO)
|
||||
file_handler = logging.FileHandler(LOG_FILE, mode='a')
|
||||
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
|
||||
logger.addHandler(file_handler)
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
|
||||
logger.addHandler(console_handler)
|
||||
logging.info("Logging initialized for foodie_automator_google.py")
|
||||
except Exception as e:
|
||||
# Fallback to console logging if file logging fails
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
force=True
|
||||
)
|
||||
logging.error(f"Failed to setup file logging for {LOG_FILE}: {e}. Using console logging.")
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
|
||||
logging.getLogger().addHandler(console_handler)
|
||||
logging.info("Console logging initialized as fallback for foodie_automator_google.py")
|
||||
|
||||
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
@@ -253,11 +289,9 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
|
||||
try:
|
||||
logging.debug(f"Using {len(posted_titles)} posted titles and {len(used_images)} used images")
|
||||
|
||||
# Define regions to scrape
|
||||
regions = ['US', 'GB', 'AU']
|
||||
all_trends = []
|
||||
|
||||
# Scrape trends for each region
|
||||
for geo in regions:
|
||||
logging.info(f"Scraping Google Trends for geo={geo}")
|
||||
trends = scrape_google_trends(geo=geo)
|
||||
@@ -267,7 +301,6 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
|
||||
else:
|
||||
logging.warning(f"No trends collected for geo={geo}")
|
||||
|
||||
# Remove duplicates by title and sort by search volume
|
||||
unique_trends = []
|
||||
seen_titles = set()
|
||||
for trend in all_trends:
|
||||
@@ -277,10 +310,9 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
|
||||
|
||||
if not unique_trends:
|
||||
logging.info("No Google Trends data available across regions")
|
||||
sleep_time = random.randint(1200, 1800) # 20–30 minutes
|
||||
sleep_time = random.randint(1200, 1800)
|
||||
return None, None, sleep_time
|
||||
|
||||
# Sort trends by search volume in descending order
|
||||
unique_trends.sort(key=lambda x: x["search_volume"], reverse=True)
|
||||
logging.info(f"Total unique trends collected: {len(unique_trends)}")
|
||||
|
||||
@@ -293,14 +325,13 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
|
||||
summary = trend.get("summary", "")
|
||||
source_name = trend.get("source", "Google Trends")
|
||||
original_source = f'<a href="{link}">{source_name}</a>'
|
||||
original_url = link # Store for fallback
|
||||
original_url = link
|
||||
|
||||
if title in posted_titles:
|
||||
logging.info(f"Skipping already posted trend: {title}")
|
||||
attempts += 1
|
||||
continue
|
||||
|
||||
# Check author availability before GPT calls
|
||||
author = get_next_author_round_robin()
|
||||
if not author:
|
||||
logging.info(f"Skipping trend '{title}' due to tweet rate limits for all authors")
|
||||
@@ -312,8 +343,12 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
|
||||
|
||||
logging.info(f"Trying Google Trend: {title} from {source_name}")
|
||||
|
||||
# Fetch DuckDuckGo context early to enhance smart_image_and_filter
|
||||
ddg_context = fetch_duckduckgo_news_context(title)
|
||||
enhanced_summary = summary + "\n\nAdditional Context: " + ddg_context if summary else ddg_context
|
||||
|
||||
try:
|
||||
image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, summary)
|
||||
image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, enhanced_summary)
|
||||
except Exception as e:
|
||||
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
|
||||
attempts += 1
|
||||
@@ -324,7 +359,6 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
|
||||
attempts += 1
|
||||
continue
|
||||
|
||||
ddg_context = fetch_duckduckgo_news_context(title)
|
||||
scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
|
||||
interest_score = is_interesting(scoring_content)
|
||||
logging.info(f"Interest score for '{title}': {interest_score}")
|
||||
@@ -405,11 +439,10 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
|
||||
)
|
||||
if not post_id:
|
||||
logging.warning(f"Failed to post to WordPress for '{title}', using original URL: {original_url}")
|
||||
post_url = original_url # Fallback to original trend URL
|
||||
post_url = original_url
|
||||
else:
|
||||
logging.info(f"Posted to WordPress for {author_username}: {post_url}")
|
||||
|
||||
# Update post with actual post_url
|
||||
post_url_encoded = quote(post_url)
|
||||
share_links = share_links_template.format(post_url=post_url_encoded)
|
||||
post_data["content"] = f"{final_summary}\n\n{share_links}"
|
||||
@@ -420,7 +453,7 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
|
||||
category=category,
|
||||
link=link,
|
||||
author=author,
|
||||
image_url=None, # Skip image re-upload
|
||||
image_url=None,
|
||||
original_source=original_source,
|
||||
image_source=image_source,
|
||||
uploader=uploader,
|
||||
@@ -431,7 +464,7 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
|
||||
)
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
|
||||
post_url = original_url # Fallback to original trend URL
|
||||
post_url = original_url
|
||||
finally:
|
||||
is_posting = False
|
||||
|
||||
@@ -446,15 +479,15 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
|
||||
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
|
||||
|
||||
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id or 'N/A'}) from Google Trends *****")
|
||||
sleep_time = random.randint(1200, 1800) # 20–30 minutes
|
||||
sleep_time = random.randint(1200, 1800)
|
||||
return post_data, category, sleep_time
|
||||
|
||||
logging.info("No interesting Google Trend found after attempts")
|
||||
sleep_time = random.randint(1200, 1800) # 20–30 minutes
|
||||
sleep_time = random.randint(1200, 1800)
|
||||
return None, None, sleep_time
|
||||
except Exception as e:
|
||||
logging.error(f"Unexpected error in curate_from_google_trends: {e}", exc_info=True)
|
||||
sleep_time = random.randint(1200, 1800) # 20–30 minutes
|
||||
sleep_time = random.randint(1200, 1800)
|
||||
return None, None, sleep_time
|
||||
|
||||
def run_google_trends_automator():
|
||||
|
||||
Reference in New Issue
Block a user