fix

9 months ago · b79397f309
parent 7a71843c7c
commit b79397f309
2 changed files with 39 additions and 20 deletions
--- a/foodie_automator_rss.py
+++ b/foodie_automator_rss.py
@ -247,15 +247,21 @@ def curate_from_rss():
            attempts += 1
            continue

-        # Remove the original title from the summary
+        # Remove the original title from the summary while preserving paragraphs
        title_pattern = re.compile(
            r'\*\*' + re.escape(title) + r'\*\*|' + re.escape(title),
            re.IGNORECASE
        )
-        final_summary = title_pattern.sub('', final_summary).strip()
-        # Clean up any extra spaces or newlines left after removal
-        final_summary = re.sub(r'\s+', ' ', final_summary)
-        final_summary = '\n'.join(para.strip() for para in final_summary.split('\n') if para.strip())
+        # Split into paragraphs, process each one, then rejoin
+        paragraphs = final_summary.split('\n')
+        cleaned_paragraphs = []
+        for para in paragraphs:
+            if para.strip():
+                # Remove the title and normalize spaces within the paragraph
+                cleaned_para = title_pattern.sub('', para).strip()
+                cleaned_para = re.sub(r'\s+', ' ', cleaned_para)
+                cleaned_paragraphs.append(cleaned_para)
+        final_summary = '\n'.join(cleaned_paragraphs)

        final_summary = insert_link_naturally(final_summary, source_name, link)
        post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
--- a/foodie_utils.py
+++ b/foodie_utils.py
@ -64,9 +64,10 @@ def save_json_file(filename, key, value):
        data.append(entry)
        # Special handling for used_images.json to save as a flat list
        if filename.endswith('used_images.json'):
-            flat_data = [item["title"] for item in data if isinstance(item, dict) and "title" in item]
+            # Update the used_images set directly to keep it in sync
+            used_images.add(key)
            with open(filename, 'w') as f:
-                json.dump(flat_data, f)
+                json.dump(list(used_images), f)
        else:
            with open(filename, 'w') as f:
                for item in data:
@ -981,22 +982,34 @@ used_images = set()
 if os.path.exists(used_images_file):
    try:
        with open(used_images_file, 'r') as f:
-            data = json.load(f)
-            # Handle malformed format (list of lists)
-            if isinstance(data, list) and data and isinstance(data[0], list):
-                logging.warning(f"Fixing malformed used_images.json format: {data[:2]}...")
+            content = f.read().strip()
+            if not content:
+                logging.warning(f"Used images file {used_images_file} is empty. Resetting to empty list.")
+                data = []
+            else:
+                data = json.loads(content)
+                if not isinstance(data, list):
+                    logging.warning(f"Invalid format in {used_images_file}: expected a list, got {type(data)}. Resetting.")
+                    data = []
+                else:
+                    # Handle malformed format (list of lists or invalid entries)
                    flat_data = []
                    for item in data:
-                    if isinstance(item, list):
-                        flat_data.extend(item)
-                    else:
+                        if isinstance(item, str) and item.startswith('https://'):
                            flat_data.append(item)
-                used_images.update(flat_data)
+                        elif isinstance(item, list):
+                            logging.warning(f"Fixing malformed entry in {used_images_file}: {item}")
+                            flat_data.extend([sub_item for sub_item in item if isinstance(sub_item, str) and sub_item.startswith('https://')])
                        else:
+                            logging.warning(f"Skipping invalid entry in {used_images_file}: {item}")
+                    data = flat_data
            used_images.update(data)
        logging.info(f"Loaded {len(used_images)} used image URLs from {used_images_file}")
    except Exception as e:
-        logging.warning(f"Failed to load used images from {used_images_file}: {e}")
+        logging.warning(f"Failed to load used images from {used_images_file}: {e}. Resetting file.")
+        used_images = set()
+        with open(used_images_file, 'w') as f:
+            json.dump([], f)

 # Function to save used_images to file
 def save_used_images():