|
|
|
@ -420,12 +420,11 @@ def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_passw |
|
|
|
} |
|
|
|
} |
|
|
|
logging.info(f"Fetching image from {image_url} for '{post_title}'") |
|
|
|
logging.info(f"Fetching image from {image_url} for '{post_title}'") |
|
|
|
|
|
|
|
|
|
|
|
# Add rate limit handling for image download |
|
|
|
|
|
|
|
for attempt in range(3): |
|
|
|
for attempt in range(3): |
|
|
|
try: |
|
|
|
try: |
|
|
|
image_response = requests.get(image_url, headers=image_headers, timeout=10) |
|
|
|
image_response = requests.get(image_url, headers=image_headers, timeout=10) |
|
|
|
if image_response.status_code == 429: |
|
|
|
if image_response.status_code == 429: |
|
|
|
wait_time = 10 * (2 ** attempt) # 10s, 20s, 40s |
|
|
|
wait_time = 10 * (2 ** attempt) |
|
|
|
logging.warning(f"Rate limit hit for {image_url}. Retrying after {wait_time}s (attempt {attempt+1}/3).") |
|
|
|
logging.warning(f"Rate limit hit for {image_url}. Retrying after {wait_time}s (attempt {attempt+1}/3).") |
|
|
|
time.sleep(wait_time) |
|
|
|
time.sleep(wait_time) |
|
|
|
continue |
|
|
|
continue |
|
|
|
@ -450,7 +449,6 @@ def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_passw |
|
|
|
response.raise_for_status() |
|
|
|
response.raise_for_status() |
|
|
|
|
|
|
|
|
|
|
|
image_id = response.json()["id"] |
|
|
|
image_id = response.json()["id"] |
|
|
|
# Always include a clickable link and uploader if available |
|
|
|
|
|
|
|
if page_url and uploader: |
|
|
|
if page_url and uploader: |
|
|
|
caption = f'<a href="{page_url}">{image_source}</a> by {uploader}' |
|
|
|
caption = f'<a href="{page_url}">{image_source}</a> by {uploader}' |
|
|
|
elif page_url: |
|
|
|
elif page_url: |
|
|
|
@ -590,13 +588,11 @@ def insert_link_naturally(summary, source_name, source_url): |
|
|
|
try: |
|
|
|
try: |
|
|
|
logging.info(f"Input summary to insert_link_naturally: {summary!r}") |
|
|
|
logging.info(f"Input summary to insert_link_naturally: {summary!r}") |
|
|
|
|
|
|
|
|
|
|
|
# Split the summary into paragraphs |
|
|
|
|
|
|
|
paragraphs = summary.split('\n') |
|
|
|
paragraphs = summary.split('\n') |
|
|
|
if not paragraphs or all(not p.strip() for p in paragraphs): |
|
|
|
if not paragraphs or all(not p.strip() for p in paragraphs): |
|
|
|
logging.error("No valid paragraphs to insert link.") |
|
|
|
logging.error("No valid paragraphs to insert link.") |
|
|
|
return summary |
|
|
|
return summary |
|
|
|
|
|
|
|
|
|
|
|
# Choose a paragraph with at least two sentences |
|
|
|
|
|
|
|
eligible_paragraphs = [p for p in paragraphs if p.strip() and len(re.split(r'(?<=[.!?])\s+', p.strip())) >= 2] |
|
|
|
eligible_paragraphs = [p for p in paragraphs if p.strip() and len(re.split(r'(?<=[.!?])\s+', p.strip())) >= 2] |
|
|
|
if not eligible_paragraphs: |
|
|
|
if not eligible_paragraphs: |
|
|
|
logging.warning("No paragraph with multiple sentences found, appending to last paragraph.") |
|
|
|
logging.warning("No paragraph with multiple sentences found, appending to last paragraph.") |
|
|
|
@ -608,29 +604,23 @@ def insert_link_naturally(summary, source_name, source_url): |
|
|
|
logging.info(f"Appended link to summary: {new_summary!r}") |
|
|
|
logging.info(f"Appended link to summary: {new_summary!r}") |
|
|
|
return new_summary |
|
|
|
return new_summary |
|
|
|
|
|
|
|
|
|
|
|
# Select a random eligible paragraph |
|
|
|
|
|
|
|
target_para = random.choice(eligible_paragraphs) |
|
|
|
target_para = random.choice(eligible_paragraphs) |
|
|
|
sentences = re.split(r'(?<=[.!?])\s+', target_para.strip()) |
|
|
|
sentences = re.split(r'(?<=[.!?])\s+', target_para.strip()) |
|
|
|
|
|
|
|
|
|
|
|
# Find a sentence to insert the link into (prefer mid-paragraph sentences) |
|
|
|
eligible_sentences = [(i, s) for i, s in enumerate(sentences) if i < len(sentences)-1 and s.strip()] |
|
|
|
eligible_sentences = [(i, s) for i, s in enumerate(sentences) if i < len(sentences)-1 and s.strip()] # Exclude the last sentence |
|
|
|
|
|
|
|
if not eligible_sentences: |
|
|
|
if not eligible_sentences: |
|
|
|
eligible_sentences = [(i, s) for i, s in enumerate(sentences) if s.strip()] # Fall back to any sentence |
|
|
|
eligible_sentences = [(i, s) for i, s in enumerate(sentences) if s.strip()] |
|
|
|
|
|
|
|
|
|
|
|
sentence_idx, sentence = random.choice(eligible_sentences) |
|
|
|
sentence_idx, sentence = random.choice(eligible_sentences) |
|
|
|
link_pattern = f'<a href="{source_url}">{source_name}</a>' |
|
|
|
link_pattern = f'<a href="{source_url}">{source_name}</a>' |
|
|
|
|
|
|
|
|
|
|
|
# Split the sentence into words and insert the link naturally |
|
|
|
|
|
|
|
words = sentence.split() |
|
|
|
words = sentence.split() |
|
|
|
if len(words) < 3: # Ensure the sentence has enough words to split |
|
|
|
if len(words) < 3: |
|
|
|
# If the sentence is too short, append the attribution |
|
|
|
|
|
|
|
new_sentence = f"{sentence} according to {link_pattern}." |
|
|
|
new_sentence = f"{sentence} according to {link_pattern}." |
|
|
|
else: |
|
|
|
else: |
|
|
|
# Insert the link mid-sentence by splitting at a random point |
|
|
|
split_point = random.randint(1, len(words)-2) |
|
|
|
split_point = random.randint(1, len(words)-2) # Avoid splitting at the very start or end |
|
|
|
|
|
|
|
new_sentence = f"{' '.join(words[:split_point])}, according to {link_pattern}, {' '.join(words[split_point:])}" |
|
|
|
new_sentence = f"{' '.join(words[:split_point])}, according to {link_pattern}, {' '.join(words[split_point:])}" |
|
|
|
|
|
|
|
|
|
|
|
# Reconstruct the paragraph with the modified sentence |
|
|
|
|
|
|
|
sentences[sentence_idx] = new_sentence |
|
|
|
sentences[sentence_idx] = new_sentence |
|
|
|
new_para = ' '.join(sentences) |
|
|
|
new_para = ' '.join(sentences) |
|
|
|
paragraphs[paragraphs.index(target_para)] = new_para |
|
|
|
paragraphs[paragraphs.index(target_para)] = new_para |
|
|
|
@ -641,7 +631,6 @@ def insert_link_naturally(summary, source_name, source_url): |
|
|
|
|
|
|
|
|
|
|
|
except Exception as e: |
|
|
|
except Exception as e: |
|
|
|
logging.error(f"Link insertion failed: {e}") |
|
|
|
logging.error(f"Link insertion failed: {e}") |
|
|
|
# Fallback: append the link to the end of the summary |
|
|
|
|
|
|
|
link_pattern = f'<a href="{source_url}">{source_name}</a>' |
|
|
|
link_pattern = f'<a href="{source_url}">{source_name}</a>' |
|
|
|
new_summary = f"{summary}\n\nSource: {link_pattern}." |
|
|
|
new_summary = f"{summary}\n\nSource: {link_pattern}." |
|
|
|
logging.info(f"Fallback summary with link: {new_summary!r}") |
|
|
|
logging.info(f"Fallback summary with link: {new_summary!r}") |
|
|
|
@ -760,7 +749,6 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im |
|
|
|
content = "Content unavailable. Check the original source for details." |
|
|
|
content = "Content unavailable. Check the original source for details." |
|
|
|
formatted_content = "\n".join(f"<p>{para}</p>" for para in content.split('\n') if para.strip()) |
|
|
|
formatted_content = "\n".join(f"<p>{para}</p>" for para in content.split('\n') if para.strip()) |
|
|
|
|
|
|
|
|
|
|
|
# Append image attribution to the content to ensure visibility |
|
|
|
|
|
|
|
if image_url and image_source: |
|
|
|
if image_url and image_source: |
|
|
|
attribution = f"Image Source: {image_source}" |
|
|
|
attribution = f"Image Source: {image_source}" |
|
|
|
if page_url and uploader: |
|
|
|
if page_url and uploader: |
|
|
|
@ -779,7 +767,6 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im |
|
|
|
} |
|
|
|
} |
|
|
|
author_id = author_id_map.get(author["username"], 5) |
|
|
|
author_id = author_id_map.get(author["username"], 5) |
|
|
|
|
|
|
|
|
|
|
|
# Handle image upload |
|
|
|
|
|
|
|
image_id = None |
|
|
|
image_id = None |
|
|
|
if image_url: |
|
|
|
if image_url: |
|
|
|
logging.info(f"Attempting image upload for '{post_data['title']}', URL: {image_url}, source: {image_source}") |
|
|
|
logging.info(f"Attempting image upload for '{post_data['title']}', URL: {image_url}, source: {image_source}") |
|
|
|
@ -828,11 +815,9 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im |
|
|
|
post_id = post_info["id"] |
|
|
|
post_id = post_info["id"] |
|
|
|
post_url = post_info["link"] |
|
|
|
post_url = post_info["link"] |
|
|
|
|
|
|
|
|
|
|
|
# Save to recent_posts.json |
|
|
|
|
|
|
|
timestamp = datetime.now(timezone.utc).isoformat() |
|
|
|
timestamp = datetime.now(timezone.utc).isoformat() |
|
|
|
save_post_to_recent(post_data["title"], post_url, author["username"], timestamp) |
|
|
|
save_post_to_recent(post_data["title"], post_url, author["username"], timestamp) |
|
|
|
|
|
|
|
|
|
|
|
# Post article tweet to X only if should_post_tweet is True |
|
|
|
|
|
|
|
if should_post_tweet: |
|
|
|
if should_post_tweet: |
|
|
|
try: |
|
|
|
try: |
|
|
|
post = {"title": post_data["title"], "url": post_url} |
|
|
|
post = {"title": post_data["title"], "url": post_url} |
|
|
|
|