my-fix-branch
Shane 7 months ago
parent 7a71843c7c
commit b79397f309
  1. 16
      foodie_automator_rss.py
  2. 43
      foodie_utils.py

@ -247,15 +247,21 @@ def curate_from_rss():
attempts += 1 attempts += 1
continue continue
# Remove the original title from the summary # Remove the original title from the summary while preserving paragraphs
title_pattern = re.compile( title_pattern = re.compile(
r'\*\*' + re.escape(title) + r'\*\*|' + re.escape(title), r'\*\*' + re.escape(title) + r'\*\*|' + re.escape(title),
re.IGNORECASE re.IGNORECASE
) )
final_summary = title_pattern.sub('', final_summary).strip() # Split into paragraphs, process each one, then rejoin
# Clean up any extra spaces or newlines left after removal paragraphs = final_summary.split('\n')
final_summary = re.sub(r'\s+', ' ', final_summary) cleaned_paragraphs = []
final_summary = '\n'.join(para.strip() for para in final_summary.split('\n') if para.strip()) for para in paragraphs:
if para.strip():
# Remove the title and normalize spaces within the paragraph
cleaned_para = title_pattern.sub('', para).strip()
cleaned_para = re.sub(r'\s+', ' ', cleaned_para)
cleaned_paragraphs.append(cleaned_para)
final_summary = '\n'.join(cleaned_paragraphs)
final_summary = insert_link_naturally(final_summary, source_name, link) final_summary = insert_link_naturally(final_summary, source_name, link)
post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title) post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)

@ -64,9 +64,10 @@ def save_json_file(filename, key, value):
data.append(entry) data.append(entry)
# Special handling for used_images.json to save as a flat list # Special handling for used_images.json to save as a flat list
if filename.endswith('used_images.json'): if filename.endswith('used_images.json'):
flat_data = [item["title"] for item in data if isinstance(item, dict) and "title" in item] # Update the used_images set directly to keep it in sync
used_images.add(key)
with open(filename, 'w') as f: with open(filename, 'w') as f:
json.dump(flat_data, f) json.dump(list(used_images), f)
else: else:
with open(filename, 'w') as f: with open(filename, 'w') as f:
for item in data: for item in data:
@ -981,22 +982,34 @@ used_images = set()
if os.path.exists(used_images_file): if os.path.exists(used_images_file):
try: try:
with open(used_images_file, 'r') as f: with open(used_images_file, 'r') as f:
data = json.load(f) content = f.read().strip()
# Handle malformed format (list of lists) if not content:
if isinstance(data, list) and data and isinstance(data[0], list): logging.warning(f"Used images file {used_images_file} is empty. Resetting to empty list.")
logging.warning(f"Fixing malformed used_images.json format: {data[:2]}...") data = []
flat_data = []
for item in data:
if isinstance(item, list):
flat_data.extend(item)
else:
flat_data.append(item)
used_images.update(flat_data)
else: else:
used_images.update(data) data = json.loads(content)
if not isinstance(data, list):
logging.warning(f"Invalid format in {used_images_file}: expected a list, got {type(data)}. Resetting.")
data = []
else:
# Handle malformed format (list of lists or invalid entries)
flat_data = []
for item in data:
if isinstance(item, str) and item.startswith('https://'):
flat_data.append(item)
elif isinstance(item, list):
logging.warning(f"Fixing malformed entry in {used_images_file}: {item}")
flat_data.extend([sub_item for sub_item in item if isinstance(sub_item, str) and sub_item.startswith('https://')])
else:
logging.warning(f"Skipping invalid entry in {used_images_file}: {item}")
data = flat_data
used_images.update(data)
logging.info(f"Loaded {len(used_images)} used image URLs from {used_images_file}") logging.info(f"Loaded {len(used_images)} used image URLs from {used_images_file}")
except Exception as e: except Exception as e:
logging.warning(f"Failed to load used images from {used_images_file}: {e}") logging.warning(f"Failed to load used images from {used_images_file}: {e}. Resetting file.")
used_images = set()
with open(used_images_file, 'w') as f:
json.dump([], f)
# Function to save used_images to file # Function to save used_images to file
def save_used_images(): def save_used_images():

Loading…
Cancel
Save