fix image swap
This commit is contained in:
+57
-45
@@ -378,12 +378,15 @@ def smart_image_and_filter(title, summary):
|
|||||||
|
|
||||||
logging.info(f"Smart image query: {image_query}, Relevance: {relevance_keywords}, Skip: {skip_flag}")
|
logging.info(f"Smart image query: {image_query}, Relevance: {relevance_keywords}, Skip: {skip_flag}")
|
||||||
|
|
||||||
if not image_query or len(image_query.split()) < 2:
|
if not image_query:
|
||||||
|
logging.warning(f"Image query is empty, using fallback")
|
||||||
|
return "food trends", ["cuisine", "dining"], skip_flag
|
||||||
|
# Allow single-word queries if they are specific (e.g., food items)
|
||||||
|
specific_single_words = ["kimchi", "sushi", "pizza", "taco", "burger"] # Add more as needed
|
||||||
|
if len(image_query.split()) < 2 and image_query.lower() not in specific_single_words:
|
||||||
logging.warning(f"Image query '{image_query}' too vague, using fallback")
|
logging.warning(f"Image query '{image_query}' too vague, using fallback")
|
||||||
return "food trends", ["cuisine", "dining"], skip_flag
|
return "food trends", ["cuisine", "dining"], skip_flag
|
||||||
|
|
||||||
return image_query, relevance_keywords, skip_flag
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Smart image/filter failed: {e}, using fallback")
|
logging.error(f"Smart image/filter failed: {e}, using fallback")
|
||||||
return "food trends", ["cuisine", "dining"], False
|
return "food trends", ["cuisine", "dining"], False
|
||||||
@@ -566,19 +569,19 @@ def insert_link_naturally(summary, source_name, source_url):
|
|||||||
logging.info(f"Input summary to insert_link_naturally: {summary!r}")
|
logging.info(f"Input summary to insert_link_naturally: {summary!r}")
|
||||||
|
|
||||||
prompt = (
|
prompt = (
|
||||||
"Take this summary and insert a single HTML link naturally into one paragraph (randomly chosen). "
|
"Take this summary and insert a single HTML link into one paragraph (randomly chosen). "
|
||||||
"Use the format '<a href=\"{source_url}\">{source_name}</a>' and weave it into the text seamlessly, "
|
"Use the format '<a href=\"{source_url}\">{source_name}</a>' and weave it into the text naturally, "
|
||||||
"e.g., 'The latest scoop from {source_name} reveals...' or '{source_name} shares this insight.' "
|
"e.g., 'According to {source_name}, ' or '{source_name} shares that '. "
|
||||||
"Vary the phrasing creatively to avoid repetition (don’t always use 'dives into'). "
|
"Place the link at the end of a sentence (after a period). "
|
||||||
"Place the link at a sentence boundary (after a period, not within numbers like '6.30am' or '1.5'). "
|
"Preserve the original paragraph structure, maintaining all newlines exactly as they are (each paragraph separated by a single \\n). "
|
||||||
"Maintain the original tone, flow, and paragraph structure, preserving all existing newlines exactly as they are. "
|
|
||||||
"Each paragraph in the input summary is separated by a single \\n; ensure the output maintains this exact separation. "
|
|
||||||
"Do not add or remove newlines beyond the original summary structure. "
|
|
||||||
"Return the modified summary with exactly one link.\n\n"
|
"Return the modified summary with exactly one link.\n\n"
|
||||||
"Summary:\n{summary}\n\n"
|
"Summary:\n{summary}\n\n"
|
||||||
"Source Name: {source_name}\nSource URL: {source_url}"
|
"Source Name: {source_name}\nSource URL: {source_url}"
|
||||||
).format(summary=summary, source_name=source_name, source_url=source_url)
|
).format(summary=summary, source_name=source_name, source_url=source_url)
|
||||||
|
|
||||||
|
# Add retry mechanism
|
||||||
|
for attempt in range(3):
|
||||||
|
try:
|
||||||
response = client.chat.completions.create(
|
response = client.chat.completions.create(
|
||||||
model=LIGHT_TASK_MODEL,
|
model=LIGHT_TASK_MODEL,
|
||||||
messages=[
|
messages=[
|
||||||
@@ -596,8 +599,12 @@ def insert_link_naturally(summary, source_name, source_url):
|
|||||||
new_summary = '\n'.join(paragraphs)
|
new_summary = '\n'.join(paragraphs)
|
||||||
logging.info(f"Summary with naturally embedded link (normalized): {new_summary!r}")
|
logging.info(f"Summary with naturally embedded link (normalized): {new_summary!r}")
|
||||||
return new_summary
|
return new_summary
|
||||||
|
else:
|
||||||
|
logging.warning(f"GPT attempt {attempt + 1}/3 failed to insert link correctly: {new_summary}")
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Link insertion attempt {attempt + 1}/3 failed: {e}")
|
||||||
|
|
||||||
logging.warning(f"GPT failed to insert link correctly: {new_summary}. Using fallback.")
|
logging.warning(f"GPT failed to insert link after 3 attempts. Using fallback.")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Link insertion failed: {e}")
|
logging.error(f"Link insertion failed: {e}")
|
||||||
|
|
||||||
@@ -612,10 +619,10 @@ def insert_link_naturally(summary, source_name, source_url):
|
|||||||
target_para = random.choice([p for p in paragraphs if p.strip()])
|
target_para = random.choice([p for p in paragraphs if p.strip()])
|
||||||
link_pattern = f'<a href="{source_url}">{source_name}</a>'
|
link_pattern = f'<a href="{source_url}">{source_name}</a>'
|
||||||
phrases = [
|
phrases = [
|
||||||
f"According to {link_pattern}", # Changed to a more neutral phrasing
|
f"According to {link_pattern}",
|
||||||
f"{link_pattern} notes this insight", # Adjusted phrasing
|
f"{link_pattern} notes this insight",
|
||||||
f"Details shared by {link_pattern}", # Adjusted phrasing
|
f"Details shared by {link_pattern}",
|
||||||
f"Source: {link_pattern}" # Simple attribution
|
f"Source: {link_pattern}"
|
||||||
]
|
]
|
||||||
insertion_phrase = random.choice(phrases)
|
insertion_phrase = random.choice(phrases)
|
||||||
|
|
||||||
@@ -864,42 +871,39 @@ used_images = set()
|
|||||||
# Load used images from file if it exists
|
# Load used images from file if it exists
|
||||||
if os.path.exists(used_images_file):
|
if os.path.exists(used_images_file):
|
||||||
try:
|
try:
|
||||||
with open(used_images_file, 'r') as f:
|
entries = load_json_file(used_images_file, IMAGE_EXPIRATION_DAYS * 24) # Use load_json_file for consistency
|
||||||
content = f.read().strip()
|
for entry in entries:
|
||||||
if not content:
|
if isinstance(entry, dict) and "title" in entry and entry["title"].startswith('https://'):
|
||||||
logging.warning(f"Used images file {used_images_file} is empty. Resetting to empty list.")
|
used_images.add(entry["title"])
|
||||||
data = []
|
|
||||||
else:
|
else:
|
||||||
data = json.loads(content)
|
logging.warning(f"Skipping invalid entry in {used_images_file}: {entry}")
|
||||||
if not isinstance(data, list):
|
|
||||||
logging.warning(f"Invalid format in {used_images_file}: expected a list, got {type(data)}. Converting to list.")
|
|
||||||
if isinstance(data, dict):
|
|
||||||
# If it's a dict, try to extract URLs from values
|
|
||||||
data = [v for v in data.values() if isinstance(v, str) and v.startswith('https://')]
|
|
||||||
else:
|
|
||||||
logging.warning(f"Cannot convert {type(data)} to list. Resetting to empty list.")
|
|
||||||
data = []
|
|
||||||
# Filter out non-string or non-URL entries
|
|
||||||
data = [item for item in data if isinstance(item, str) and item.startswith('https://')]
|
|
||||||
used_images.update(data)
|
|
||||||
logging.info(f"Loaded {len(used_images)} used image URLs from {used_images_file}")
|
logging.info(f"Loaded {len(used_images)} used image URLs from {used_images_file}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f"Failed to load used images from {used_images_file}: {e}. Resetting to empty set.")
|
logging.warning(f"Failed to load used images from {used_images_file}: {e}. Resetting to empty set.")
|
||||||
used_images = set()
|
used_images = set()
|
||||||
with open(used_images_file, 'w') as f:
|
with open(used_images_file, 'w') as f:
|
||||||
json.dump([], f)
|
f.write("")
|
||||||
|
|
||||||
# Function to save used_images to file
|
# Function to save used_images to file
|
||||||
def save_used_images():
|
def save_used_images():
|
||||||
try:
|
try:
|
||||||
# Ensure used_images contains only valid URLs
|
# Load existing entries to preserve timestamps
|
||||||
valid_urls = [url for url in used_images if isinstance(url, str) and url.startswith('https://')]
|
entries = load_json_file(used_images_file, IMAGE_EXPIRATION_DAYS * 24)
|
||||||
if len(valid_urls) != len(used_images):
|
existing_entries = {entry["title"]: entry for entry in entries if isinstance(entry, dict) and "title" in entry}
|
||||||
logging.warning(f"Found {len(used_images) - len(valid_urls)} invalid URLs in used_images set")
|
|
||||||
|
# Create new entries for used_images
|
||||||
|
timestamp = datetime.now(timezone.utc).isoformat()
|
||||||
|
updated_entries = []
|
||||||
|
for url in used_images:
|
||||||
|
if url in existing_entries:
|
||||||
|
updated_entries.append(existing_entries[url])
|
||||||
|
else:
|
||||||
|
updated_entries.append({"title": url, "timestamp": timestamp})
|
||||||
|
|
||||||
with open(used_images_file, 'w') as f:
|
with open(used_images_file, 'w') as f:
|
||||||
json.dump(valid_urls, f, indent=2)
|
for entry in updated_entries:
|
||||||
logging.info(f"Saved {len(valid_urls)} used image URLs to {used_images_file}")
|
f.write(json.dumps(entry) + '\n')
|
||||||
|
logging.info(f"Saved {len(updated_entries)} used image URLs to {used_images_file}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f"Failed to save used images to {used_images_file}: {e}")
|
logging.warning(f"Failed to save used images to {used_images_file}: {e}")
|
||||||
|
|
||||||
@@ -938,7 +942,7 @@ def process_photo(photo, search_query):
|
|||||||
page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}"
|
page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}"
|
||||||
|
|
||||||
used_images.add(img_url)
|
used_images.add(img_url)
|
||||||
save_used_images()
|
save_used_images() # This will now save in the correct format
|
||||||
|
|
||||||
flickr_data = {
|
flickr_data = {
|
||||||
"title": search_query,
|
"title": search_query,
|
||||||
@@ -1052,7 +1056,15 @@ def get_flickr_image(search_query, relevance_keywords):
|
|||||||
|
|
||||||
last_flickr_request_time = time.time()
|
last_flickr_request_time = time.time()
|
||||||
|
|
||||||
# Step 1: Search DDG to find Flickr photo IDs
|
# Step 1: Search Flickr directly with the original query
|
||||||
|
logging.info(f"Searching Flickr directly with query: '{search_query}'")
|
||||||
|
photos = search_flickr(search_query)
|
||||||
|
for photo in photos:
|
||||||
|
result = process_photo(photo, search_query)
|
||||||
|
if result:
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Step 2: Search DDG to find Flickr photo IDs
|
||||||
logging.info(f"Searching DDG with query: '{search_query} site:flickr.com'")
|
logging.info(f"Searching DDG with query: '{search_query} site:flickr.com'")
|
||||||
photo_ids = search_ddg_for_flickr(search_query)
|
photo_ids = search_ddg_for_flickr(search_query)
|
||||||
if photo_ids:
|
if photo_ids:
|
||||||
@@ -1063,7 +1075,7 @@ def get_flickr_image(search_query, relevance_keywords):
|
|||||||
if result:
|
if result:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
# Step 2: Break down the query into keywords and classify them for direct Flickr API search
|
# Step 3: Break down the query into keywords and classify them
|
||||||
keywords = search_query.lower().split()
|
keywords = search_query.lower().split()
|
||||||
if len(keywords) > 1:
|
if len(keywords) > 1:
|
||||||
classifications = classify_keywords(keywords)
|
classifications = classify_keywords(keywords)
|
||||||
@@ -1080,7 +1092,7 @@ def get_flickr_image(search_query, relevance_keywords):
|
|||||||
if result:
|
if result:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
# Step 3: Final fallback using relevance keywords
|
# Step 4: Final fallback using relevance keywords
|
||||||
fallback_query = " ".join(relevance_keywords) if isinstance(relevance_keywords, list) else relevance_keywords
|
fallback_query = " ".join(relevance_keywords) if isinstance(relevance_keywords, list) else relevance_keywords
|
||||||
logging.info(f"No results found. Falling back to generic query: '{fallback_query}'")
|
logging.info(f"No results found. Falling back to generic query: '{fallback_query}'")
|
||||||
photos = search_flickr(fallback_query)
|
photos = search_flickr(fallback_query)
|
||||||
@@ -1155,7 +1167,7 @@ def prepare_post_data(final_summary, original_title, context_info=""):
|
|||||||
|
|
||||||
def save_post_to_recent(post_title, post_url, author_username, timestamp):
|
def save_post_to_recent(post_title, post_url, author_username, timestamp):
|
||||||
try:
|
try:
|
||||||
recent_posts = load_json_file('/home/shane/foodie_automator/recent_posts.json')
|
recent_posts = load_json_file('/home/shane/foodie_automator/recent_posts.json', 24) # Added expiration_hours
|
||||||
entry = {
|
entry = {
|
||||||
"title": post_title,
|
"title": post_title,
|
||||||
"url": post_url,
|
"url": post_url,
|
||||||
|
|||||||
Reference in New Issue
Block a user