From 504d7f6349ed3b05dae6818138b5c1c175c2a01c Mon Sep 17 00:00:00 2001 From: Shane Date: Sun, 4 May 2025 12:57:22 +1000 Subject: [PATCH] fix --- foodie_automator_google.py | 6 +++--- foodie_automator_reddit.py | 6 +++--- foodie_automator_rss.py | 6 +++--- foodie_utils.py | 25 +++++-------------------- 4 files changed, 14 insertions(+), 29 deletions(-) diff --git a/foodie_automator_google.py b/foodie_automator_google.py index cdd735c..804fdfc 100644 --- a/foodie_automator_google.py +++ b/foodie_automator_google.py @@ -249,7 +249,7 @@ def curate_from_google_trends(geo_list=['US']): final_summary = insert_link_naturally(final_summary, source_name, link) - post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title, main_topic) + post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic) if not post_data: attempts += 1 continue @@ -280,7 +280,7 @@ def curate_from_google_trends(geo_list=['US']): original_source=original_source, image_source=image_source, uploader=uploader, - pixabay_url=pixabay_url, + page_url=page_url, interest_score=interest_score, should_post_tweet=True ) @@ -304,7 +304,7 @@ def curate_from_google_trends(geo_list=['US']): original_source=original_source, image_source=image_source, uploader=uploader, - pixabay_url=pixabay_url, + page_url=page_url, interest_score=interest_score, post_id=post_id, should_post_tweet=False diff --git a/foodie_automator_reddit.py b/foodie_automator_reddit.py index 0297b07..cf27c45 100644 --- a/foodie_automator_reddit.py +++ b/foodie_automator_reddit.py @@ -315,7 +315,7 @@ def curate_from_reddit(): final_summary = insert_link_naturally(final_summary, source_name, link) - post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title, main_topic) + post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic) if not post_data: attempts += 1 continue @@ -346,7 +346,7 @@ def curate_from_reddit(): original_source=original_source, image_source=image_source, uploader=uploader, - pixabay_url=pixabay_url, + page_url=page_url, interest_score=interest_score, should_post_tweet=True ) @@ -370,7 +370,7 @@ def curate_from_reddit(): original_source=original_source, image_source=image_source, uploader=uploader, - pixabay_url=pixabay_url, + page_url=page_url, interest_score=interest_score, post_id=post_id, should_post_tweet=False diff --git a/foodie_automator_rss.py b/foodie_automator_rss.py index 731c197..824a956 100644 --- a/foodie_automator_rss.py +++ b/foodie_automator_rss.py @@ -276,7 +276,7 @@ def curate_from_rss(): final_summary = insert_link_naturally(final_summary, source_name, link) - post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title, main_topic) + post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic) if not post_data: attempts += 1 continue @@ -307,7 +307,7 @@ def curate_from_rss(): original_source=original_source, image_source=image_source, uploader=uploader, - pixabay_url=pixabay_url, + page_url=page_url, interest_score=interest_score, should_post_tweet=True ) @@ -331,7 +331,7 @@ def curate_from_rss(): original_source=original_source, image_source=image_source, uploader=uploader, - pixabay_url=pixabay_url, + page_url=page_url, interest_score=interest_score, post_id=post_id, should_post_tweet=False diff --git a/foodie_utils.py b/foodie_utils.py index 57d6529..3df3367 100644 --- a/foodie_utils.py +++ b/foodie_utils.py @@ -420,12 +420,11 @@ def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_passw } logging.info(f"Fetching image from {image_url} for '{post_title}'") - # Add rate limit handling for image download for attempt in range(3): try: image_response = requests.get(image_url, headers=image_headers, timeout=10) if image_response.status_code == 429: - wait_time = 10 * (2 ** attempt) # 10s, 20s, 40s + wait_time = 10 * (2 ** attempt) logging.warning(f"Rate limit hit for {image_url}. Retrying after {wait_time}s (attempt {attempt+1}/3).") time.sleep(wait_time) continue @@ -450,7 +449,6 @@ def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_passw response.raise_for_status() image_id = response.json()["id"] - # Always include a clickable link and uploader if available if page_url and uploader: caption = f'{image_source} by {uploader}' elif page_url: @@ -590,13 +588,11 @@ def insert_link_naturally(summary, source_name, source_url): try: logging.info(f"Input summary to insert_link_naturally: {summary!r}") - # Split the summary into paragraphs paragraphs = summary.split('\n') if not paragraphs or all(not p.strip() for p in paragraphs): logging.error("No valid paragraphs to insert link.") return summary - # Choose a paragraph with at least two sentences eligible_paragraphs = [p for p in paragraphs if p.strip() and len(re.split(r'(?<=[.!?])\s+', p.strip())) >= 2] if not eligible_paragraphs: logging.warning("No paragraph with multiple sentences found, appending to last paragraph.") @@ -608,29 +604,23 @@ def insert_link_naturally(summary, source_name, source_url): logging.info(f"Appended link to summary: {new_summary!r}") return new_summary - # Select a random eligible paragraph target_para = random.choice(eligible_paragraphs) sentences = re.split(r'(?<=[.!?])\s+', target_para.strip()) - # Find a sentence to insert the link into (prefer mid-paragraph sentences) - eligible_sentences = [(i, s) for i, s in enumerate(sentences) if i < len(sentences)-1 and s.strip()] # Exclude the last sentence + eligible_sentences = [(i, s) for i, s in enumerate(sentences) if i < len(sentences)-1 and s.strip()] if not eligible_sentences: - eligible_sentences = [(i, s) for i, s in enumerate(sentences) if s.strip()] # Fall back to any sentence + eligible_sentences = [(i, s) for i, s in enumerate(sentences) if s.strip()] sentence_idx, sentence = random.choice(eligible_sentences) link_pattern = f'{source_name}' - # Split the sentence into words and insert the link naturally words = sentence.split() - if len(words) < 3: # Ensure the sentence has enough words to split - # If the sentence is too short, append the attribution + if len(words) < 3: new_sentence = f"{sentence} according to {link_pattern}." else: - # Insert the link mid-sentence by splitting at a random point - split_point = random.randint(1, len(words)-2) # Avoid splitting at the very start or end + split_point = random.randint(1, len(words)-2) new_sentence = f"{' '.join(words[:split_point])}, according to {link_pattern}, {' '.join(words[split_point:])}" - # Reconstruct the paragraph with the modified sentence sentences[sentence_idx] = new_sentence new_para = ' '.join(sentences) paragraphs[paragraphs.index(target_para)] = new_para @@ -641,7 +631,6 @@ def insert_link_naturally(summary, source_name, source_url): except Exception as e: logging.error(f"Link insertion failed: {e}") - # Fallback: append the link to the end of the summary link_pattern = f'{source_name}' new_summary = f"{summary}\n\nSource: {link_pattern}." logging.info(f"Fallback summary with link: {new_summary!r}") @@ -760,7 +749,6 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im content = "Content unavailable. Check the original source for details." formatted_content = "\n".join(f"

{para}

" for para in content.split('\n') if para.strip()) - # Append image attribution to the content to ensure visibility if image_url and image_source: attribution = f"Image Source: {image_source}" if page_url and uploader: @@ -779,7 +767,6 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im } author_id = author_id_map.get(author["username"], 5) - # Handle image upload image_id = None if image_url: logging.info(f"Attempting image upload for '{post_data['title']}', URL: {image_url}, source: {image_source}") @@ -828,11 +815,9 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im post_id = post_info["id"] post_url = post_info["link"] - # Save to recent_posts.json timestamp = datetime.now(timezone.utc).isoformat() save_post_to_recent(post_data["title"], post_url, author["username"], timestamp) - # Post article tweet to X only if should_post_tweet is True if should_post_tweet: try: post = {"title": post_data["title"], "url": post_url}