diff --git a/foodie_automator_rss.py b/foodie_automator_rss.py index aac82ca..49eb3f7 100644 --- a/foodie_automator_rss.py +++ b/foodie_automator_rss.py @@ -247,15 +247,21 @@ def curate_from_rss(): attempts += 1 continue - # Remove the original title from the summary + # Remove the original title from the summary while preserving paragraphs title_pattern = re.compile( r'\*\*' + re.escape(title) + r'\*\*|' + re.escape(title), re.IGNORECASE ) - final_summary = title_pattern.sub('', final_summary).strip() - # Clean up any extra spaces or newlines left after removal - final_summary = re.sub(r'\s+', ' ', final_summary) - final_summary = '\n'.join(para.strip() for para in final_summary.split('\n') if para.strip()) + # Split into paragraphs, process each one, then rejoin + paragraphs = final_summary.split('\n') + cleaned_paragraphs = [] + for para in paragraphs: + if para.strip(): + # Remove the title and normalize spaces within the paragraph + cleaned_para = title_pattern.sub('', para).strip() + cleaned_para = re.sub(r'\s+', ' ', cleaned_para) + cleaned_paragraphs.append(cleaned_para) + final_summary = '\n'.join(cleaned_paragraphs) final_summary = insert_link_naturally(final_summary, source_name, link) post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title) diff --git a/foodie_utils.py b/foodie_utils.py index 52cf432..ff46f45 100644 --- a/foodie_utils.py +++ b/foodie_utils.py @@ -64,9 +64,10 @@ def save_json_file(filename, key, value): data.append(entry) # Special handling for used_images.json to save as a flat list if filename.endswith('used_images.json'): - flat_data = [item["title"] for item in data if isinstance(item, dict) and "title" in item] + # Update the used_images set directly to keep it in sync + used_images.add(key) with open(filename, 'w') as f: - json.dump(flat_data, f) + json.dump(list(used_images), f) else: with open(filename, 'w') as f: for item in data: @@ -981,22 +982,34 @@ used_images = set() if os.path.exists(used_images_file): try: with open(used_images_file, 'r') as f: - data = json.load(f) - # Handle malformed format (list of lists) - if isinstance(data, list) and data and isinstance(data[0], list): - logging.warning(f"Fixing malformed used_images.json format: {data[:2]}...") - flat_data = [] - for item in data: - if isinstance(item, list): - flat_data.extend(item) - else: - flat_data.append(item) - used_images.update(flat_data) + content = f.read().strip() + if not content: + logging.warning(f"Used images file {used_images_file} is empty. Resetting to empty list.") + data = [] else: - used_images.update(data) + data = json.loads(content) + if not isinstance(data, list): + logging.warning(f"Invalid format in {used_images_file}: expected a list, got {type(data)}. Resetting.") + data = [] + else: + # Handle malformed format (list of lists or invalid entries) + flat_data = [] + for item in data: + if isinstance(item, str) and item.startswith('https://'): + flat_data.append(item) + elif isinstance(item, list): + logging.warning(f"Fixing malformed entry in {used_images_file}: {item}") + flat_data.extend([sub_item for sub_item in item if isinstance(sub_item, str) and sub_item.startswith('https://')]) + else: + logging.warning(f"Skipping invalid entry in {used_images_file}: {item}") + data = flat_data + used_images.update(data) logging.info(f"Loaded {len(used_images)} used image URLs from {used_images_file}") except Exception as e: - logging.warning(f"Failed to load used images from {used_images_file}: {e}") + logging.warning(f"Failed to load used images from {used_images_file}: {e}. Resetting file.") + used_images = set() + with open(used_images_file, 'w') as f: + json.dump([], f) # Function to save used_images to file def save_used_images():