|
|
|
|
@ -407,7 +407,7 @@ def extract_main_topic(text): |
|
|
|
|
# Fallback to a generic term if no specific food item is found |
|
|
|
|
return "food trends" |
|
|
|
|
|
|
|
|
|
def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_password, image_source="Pixabay", uploader=None, pixabay_url=None): |
|
|
|
|
def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_password, image_source="Pixabay", uploader=None, page_url=None): |
|
|
|
|
try: |
|
|
|
|
safe_title = post_title.encode('ascii', 'ignore').decode('ascii').replace(' ', '_')[:50] |
|
|
|
|
headers = { |
|
|
|
|
@ -450,7 +450,13 @@ def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_passw |
|
|
|
|
response.raise_for_status() |
|
|
|
|
|
|
|
|
|
image_id = response.json()["id"] |
|
|
|
|
caption = f'<a href="{pixabay_url}">{image_source}</a> by {uploader}' if pixabay_url and uploader else image_source |
|
|
|
|
# Always include a clickable link and uploader if available |
|
|
|
|
if page_url and uploader: |
|
|
|
|
caption = f'<a href="{page_url}">{image_source}</a> by {uploader}' |
|
|
|
|
elif page_url: |
|
|
|
|
caption = f'<a href="{page_url}">{image_source}</a>' |
|
|
|
|
else: |
|
|
|
|
caption = image_source |
|
|
|
|
requests.post( |
|
|
|
|
f"{wp_base_url}/media/{image_id}", |
|
|
|
|
headers={"Authorization": headers["Authorization"], "Content-Type": "application/json"}, |
|
|
|
|
@ -584,78 +590,60 @@ def insert_link_naturally(summary, source_name, source_url): |
|
|
|
|
try: |
|
|
|
|
logging.info(f"Input summary to insert_link_naturally: {summary!r}") |
|
|
|
|
|
|
|
|
|
prompt = ( |
|
|
|
|
"Take this summary and insert a single HTML link into one paragraph (randomly chosen). " |
|
|
|
|
"Use the format '<a href=\"{source_url}\">{source_name}</a>' and weave it into the text naturally, " |
|
|
|
|
"e.g., 'According to {source_name}, ' or '{source_name} shares that '. " |
|
|
|
|
"Place the link at the end of a sentence (after a period). " |
|
|
|
|
"Preserve the original paragraph structure, maintaining all newlines exactly as they are (each paragraph separated by a single \\n). " |
|
|
|
|
"Return the modified summary with exactly one link.\n\n" |
|
|
|
|
"Summary:\n{summary}\n\n" |
|
|
|
|
"Source Name: {source_name}\nSource URL: {source_url}" |
|
|
|
|
).format(summary=summary, source_name=source_name, source_url=source_url) |
|
|
|
|
|
|
|
|
|
# Add retry mechanism |
|
|
|
|
for attempt in range(3): |
|
|
|
|
try: |
|
|
|
|
response = client.chat.completions.create( |
|
|
|
|
model=LIGHT_TASK_MODEL, |
|
|
|
|
messages=[ |
|
|
|
|
{"role": "system", "content": prompt}, |
|
|
|
|
{"role": "user", "content": "Insert the link naturally into the summary."} |
|
|
|
|
], |
|
|
|
|
max_tokens=1000, |
|
|
|
|
temperature=0.7 |
|
|
|
|
) |
|
|
|
|
new_summary = response.choices[0].message.content.strip() |
|
|
|
|
# Split the summary into paragraphs |
|
|
|
|
paragraphs = summary.split('\n') |
|
|
|
|
if not paragraphs or all(not p.strip() for p in paragraphs): |
|
|
|
|
logging.error("No valid paragraphs to insert link.") |
|
|
|
|
return summary |
|
|
|
|
|
|
|
|
|
# Choose a paragraph with at least two sentences |
|
|
|
|
eligible_paragraphs = [p for p in paragraphs if p.strip() and len(re.split(r'(?<=[.!?])\s+', p.strip())) >= 2] |
|
|
|
|
if not eligible_paragraphs: |
|
|
|
|
logging.warning("No paragraph with multiple sentences found, appending to last paragraph.") |
|
|
|
|
target_para = paragraphs[-1].strip() |
|
|
|
|
link_pattern = f'<a href="{source_url}">{source_name}</a>' |
|
|
|
|
if new_summary and new_summary.count(link_pattern) == 1: |
|
|
|
|
paragraphs = new_summary.split('\n') |
|
|
|
|
paragraphs = [p.strip() for p in paragraphs] |
|
|
|
|
new_para = f"{target_para} Source: {link_pattern}." |
|
|
|
|
paragraphs[-1] = new_para |
|
|
|
|
new_summary = '\n'.join(paragraphs) |
|
|
|
|
logging.info(f"Summary with naturally embedded link (normalized): {new_summary!r}") |
|
|
|
|
logging.info(f"Appended link to summary: {new_summary!r}") |
|
|
|
|
return new_summary |
|
|
|
|
else: |
|
|
|
|
logging.warning(f"GPT attempt {attempt + 1}/3 failed to insert link correctly: {new_summary}") |
|
|
|
|
except Exception as e: |
|
|
|
|
logging.error(f"Link insertion attempt {attempt + 1}/3 failed: {e}") |
|
|
|
|
|
|
|
|
|
logging.warning(f"GPT failed to insert link after 3 attempts. Using fallback.") |
|
|
|
|
except Exception as e: |
|
|
|
|
logging.error(f"Link insertion failed: {e}") |
|
|
|
|
# Select a random eligible paragraph |
|
|
|
|
target_para = random.choice(eligible_paragraphs) |
|
|
|
|
sentences = re.split(r'(?<=[.!?])\s+', target_para.strip()) |
|
|
|
|
|
|
|
|
|
# Fallback path |
|
|
|
|
time_pattern = r'\b\d{1,2}\.\d{2}(?:am|pm)\b' |
|
|
|
|
protected_summary = re.sub(time_pattern, lambda m: m.group(0).replace('.', '@'), summary) |
|
|
|
|
paragraphs = protected_summary.split('\n') |
|
|
|
|
if not paragraphs or all(not p.strip() for p in paragraphs): |
|
|
|
|
logging.error("No valid paragraphs to insert link.") |
|
|
|
|
return summary |
|
|
|
|
# Find a sentence to insert the link into (prefer mid-paragraph sentences) |
|
|
|
|
eligible_sentences = [(i, s) for i, s in enumerate(sentences) if i < len(sentences)-1 and s.strip()] # Exclude the last sentence |
|
|
|
|
if not eligible_sentences: |
|
|
|
|
eligible_sentences = [(i, s) for i, s in enumerate(sentences) if s.strip()] # Fall back to any sentence |
|
|
|
|
|
|
|
|
|
target_para = random.choice([p for p in paragraphs if p.strip()]) |
|
|
|
|
sentence_idx, sentence = random.choice(eligible_sentences) |
|
|
|
|
link_pattern = f'<a href="{source_url}">{source_name}</a>' |
|
|
|
|
phrases = [ |
|
|
|
|
f"According to {link_pattern}", |
|
|
|
|
f"{link_pattern} notes this insight", |
|
|
|
|
f"Details shared by {link_pattern}", |
|
|
|
|
f"Source: {link_pattern}" |
|
|
|
|
] |
|
|
|
|
insertion_phrase = random.choice(phrases) |
|
|
|
|
|
|
|
|
|
sentences = re.split(r'(?<=[.!?])\s+', target_para) |
|
|
|
|
insertion_point = -1 |
|
|
|
|
for i, sent in enumerate(sentences): |
|
|
|
|
if sent.strip() and '@' not in sent: |
|
|
|
|
insertion_point = sum(len(s) + 1 for s in sentences[:i+1]) |
|
|
|
|
break |
|
|
|
|
if insertion_point == -1: |
|
|
|
|
insertion_point = len(target_para) |
|
|
|
|
# Split the sentence into words and insert the link naturally |
|
|
|
|
words = sentence.split() |
|
|
|
|
if len(words) < 3: # Ensure the sentence has enough words to split |
|
|
|
|
# If the sentence is too short, append the attribution |
|
|
|
|
new_sentence = f"{sentence} according to {link_pattern}." |
|
|
|
|
else: |
|
|
|
|
# Insert the link mid-sentence by splitting at a random point |
|
|
|
|
split_point = random.randint(1, len(words)-2) # Avoid splitting at the very start or end |
|
|
|
|
new_sentence = f"{' '.join(words[:split_point])}, according to {link_pattern}, {' '.join(words[split_point:])}" |
|
|
|
|
|
|
|
|
|
new_para = f"{target_para[:insertion_point]} {insertion_phrase}. {target_para[insertion_point:]}".strip() |
|
|
|
|
# Reconstruct the paragraph with the modified sentence |
|
|
|
|
sentences[sentence_idx] = new_sentence |
|
|
|
|
new_para = ' '.join(sentences) |
|
|
|
|
paragraphs[paragraphs.index(target_para)] = new_para |
|
|
|
|
|
|
|
|
|
new_summary = '\n'.join(paragraphs) |
|
|
|
|
logging.info(f"Summary with naturally embedded link: {new_summary!r}") |
|
|
|
|
return new_summary |
|
|
|
|
|
|
|
|
|
new_summary = new_summary.replace('@', '.') |
|
|
|
|
except Exception as e: |
|
|
|
|
logging.error(f"Link insertion failed: {e}") |
|
|
|
|
# Fallback: append the link to the end of the summary |
|
|
|
|
link_pattern = f'<a href="{source_url}">{source_name}</a>' |
|
|
|
|
new_summary = f"{summary}\n\nSource: {link_pattern}." |
|
|
|
|
logging.info(f"Fallback summary with link: {new_summary!r}") |
|
|
|
|
return new_summary |
|
|
|
|
|
|
|
|
|
@ -725,7 +713,7 @@ def get_wp_tag_id(tag_name, wp_base_url, wp_username, wp_password): |
|
|
|
|
logging.error(f"Failed to get WP tag ID for '{tag_name}': {e}") |
|
|
|
|
return None |
|
|
|
|
|
|
|
|
|
def post_to_wp(post_data, category, link, author, image_url, original_source, image_source="Pixabay", uploader=None, pixabay_url=None, interest_score=4, post_id=None, should_post_tweet=True): |
|
|
|
|
def post_to_wp(post_data, category, link, author, image_url, original_source, image_source="Pixabay", uploader=None, page_url=None, interest_score=4, post_id=None, should_post_tweet=True): |
|
|
|
|
wp_base_url = "https://insiderfoodie.com/wp-json/wp/v2" |
|
|
|
|
logging.info(f"Starting post_to_wp for '{post_data['title']}', image_source: {image_source}") |
|
|
|
|
|
|
|
|
|
@ -772,6 +760,15 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im |
|
|
|
|
content = "Content unavailable. Check the original source for details." |
|
|
|
|
formatted_content = "\n".join(f"<p>{para}</p>" for para in content.split('\n') if para.strip()) |
|
|
|
|
|
|
|
|
|
# Append image attribution to the content to ensure visibility |
|
|
|
|
if image_url and image_source: |
|
|
|
|
attribution = f"Image Source: {image_source}" |
|
|
|
|
if page_url and uploader: |
|
|
|
|
attribution = f'Image Source: <a href="{page_url}">{image_source}</a> by {uploader}' |
|
|
|
|
elif page_url: |
|
|
|
|
attribution = f'Image Source: <a href="{page_url}">{image_source}</a>' |
|
|
|
|
formatted_content += f"\n<p>{attribution}</p>" |
|
|
|
|
|
|
|
|
|
author_id_map = { |
|
|
|
|
"owenjohnson": 10, |
|
|
|
|
"javiermorales": 2, |
|
|
|
|
@ -786,13 +783,13 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im |
|
|
|
|
image_id = None |
|
|
|
|
if image_url: |
|
|
|
|
logging.info(f"Attempting image upload for '{post_data['title']}', URL: {image_url}, source: {image_source}") |
|
|
|
|
image_id = upload_image_to_wp(image_url, post_data["title"], wp_base_url, wp_username, wp_password, image_source, uploader, pixabay_url) |
|
|
|
|
image_id = upload_image_to_wp(image_url, post_data["title"], wp_base_url, wp_username, wp_password, image_source, uploader, page_url) |
|
|
|
|
if not image_id: |
|
|
|
|
logging.info(f"Flickr upload failed for '{post_data['title']}', falling back to Pixabay") |
|
|
|
|
pixabay_query = post_data["title"][:50] |
|
|
|
|
image_url, image_source, uploader, pixabay_url = get_image(pixabay_query) |
|
|
|
|
image_url, image_source, uploader, page_url = get_image(pixabay_query) |
|
|
|
|
if image_url: |
|
|
|
|
image_id = upload_image_to_wp(image_url, post_data["title"], wp_base_url, wp_username, wp_password, image_source, uploader, pixabay_url) |
|
|
|
|
image_id = upload_image_to_wp(image_url, post_data["title"], wp_base_url, wp_username, wp_password, image_source, uploader, page_url) |
|
|
|
|
if not image_id: |
|
|
|
|
logging.warning(f"All image uploads failed for '{post_data['title']}' - posting without image") |
|
|
|
|
|
|
|
|
|
@ -1164,22 +1161,11 @@ def prepare_post_data(summary, title, main_topic=None): |
|
|
|
|
try: |
|
|
|
|
logging.info(f"Preparing post data for summary: {summary[:100]}...") |
|
|
|
|
|
|
|
|
|
prompt = ( |
|
|
|
|
"Generate a concise, engaging title (5-15 words) for this food-related article summary. " |
|
|
|
|
"The title should be catchy, avoid emojis, and not reproduce the original title verbatim. " |
|
|
|
|
"Return the title as plain text." |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
response = client.chat.completions.create( |
|
|
|
|
model=LIGHT_TASK_MODEL, |
|
|
|
|
messages=[ |
|
|
|
|
{"role": "system", "content": prompt}, |
|
|
|
|
{"role": "user", "content": summary} |
|
|
|
|
], |
|
|
|
|
max_tokens=50, |
|
|
|
|
temperature=0.7 |
|
|
|
|
) |
|
|
|
|
new_title = response.choices[0].message.content.strip() |
|
|
|
|
# Use the original generate_title_from_summary function to generate the title |
|
|
|
|
new_title = generate_title_from_summary(summary) |
|
|
|
|
if not new_title: |
|
|
|
|
logging.warning("Title generation failed, using fallback title") |
|
|
|
|
new_title = "A Tasty Food Discovery Awaits You" |
|
|
|
|
logging.info(f"Generated new title: '{new_title}'") |
|
|
|
|
|
|
|
|
|
# Update to unpack four values |
|
|
|
|
@ -1199,8 +1185,6 @@ def prepare_post_data(summary, title, main_topic=None): |
|
|
|
|
logging.warning("No image found for post, skipping") |
|
|
|
|
return None, None, None, None, None, None, None |
|
|
|
|
|
|
|
|
|
pixabay_url = page_url if image_source == "Pixabay" else None |
|
|
|
|
|
|
|
|
|
# Select a full author dictionary from AUTHORS (already imported from foodie_config) |
|
|
|
|
author = random.choice(AUTHORS) |
|
|
|
|
|
|
|
|
|
@ -1216,7 +1200,7 @@ def prepare_post_data(summary, title, main_topic=None): |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
logging.info(f"Post data prepared: Title: '{new_title}', Category: {category}, Author: {author['username']}") |
|
|
|
|
return post_data, author, category, image_url, image_source, uploader, pixabay_url |
|
|
|
|
return post_data, author, category, image_url, image_source, uploader, page_url |
|
|
|
|
|
|
|
|
|
except Exception as e: |
|
|
|
|
logging.error(f"Failed to prepare post data: {e}") |
|
|
|
|
|