fix
This commit is contained in:
+11
-5
@@ -247,15 +247,21 @@ def curate_from_rss():
|
||||
attempts += 1
|
||||
continue
|
||||
|
||||
# Remove the original title from the summary
|
||||
# Remove the original title from the summary while preserving paragraphs
|
||||
title_pattern = re.compile(
|
||||
r'\*\*' + re.escape(title) + r'\*\*|' + re.escape(title),
|
||||
re.IGNORECASE
|
||||
)
|
||||
final_summary = title_pattern.sub('', final_summary).strip()
|
||||
# Clean up any extra spaces or newlines left after removal
|
||||
final_summary = re.sub(r'\s+', ' ', final_summary)
|
||||
final_summary = '\n'.join(para.strip() for para in final_summary.split('\n') if para.strip())
|
||||
# Split into paragraphs, process each one, then rejoin
|
||||
paragraphs = final_summary.split('\n')
|
||||
cleaned_paragraphs = []
|
||||
for para in paragraphs:
|
||||
if para.strip():
|
||||
# Remove the title and normalize spaces within the paragraph
|
||||
cleaned_para = title_pattern.sub('', para).strip()
|
||||
cleaned_para = re.sub(r'\s+', ' ', cleaned_para)
|
||||
cleaned_paragraphs.append(cleaned_para)
|
||||
final_summary = '\n'.join(cleaned_paragraphs)
|
||||
|
||||
final_summary = insert_link_naturally(final_summary, source_name, link)
|
||||
post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
|
||||
|
||||
+24
-11
@@ -64,9 +64,10 @@ def save_json_file(filename, key, value):
|
||||
data.append(entry)
|
||||
# Special handling for used_images.json to save as a flat list
|
||||
if filename.endswith('used_images.json'):
|
||||
flat_data = [item["title"] for item in data if isinstance(item, dict) and "title" in item]
|
||||
# Update the used_images set directly to keep it in sync
|
||||
used_images.add(key)
|
||||
with open(filename, 'w') as f:
|
||||
json.dump(flat_data, f)
|
||||
json.dump(list(used_images), f)
|
||||
else:
|
||||
with open(filename, 'w') as f:
|
||||
for item in data:
|
||||
@@ -981,22 +982,34 @@ used_images = set()
|
||||
if os.path.exists(used_images_file):
|
||||
try:
|
||||
with open(used_images_file, 'r') as f:
|
||||
data = json.load(f)
|
||||
# Handle malformed format (list of lists)
|
||||
if isinstance(data, list) and data and isinstance(data[0], list):
|
||||
logging.warning(f"Fixing malformed used_images.json format: {data[:2]}...")
|
||||
content = f.read().strip()
|
||||
if not content:
|
||||
logging.warning(f"Used images file {used_images_file} is empty. Resetting to empty list.")
|
||||
data = []
|
||||
else:
|
||||
data = json.loads(content)
|
||||
if not isinstance(data, list):
|
||||
logging.warning(f"Invalid format in {used_images_file}: expected a list, got {type(data)}. Resetting.")
|
||||
data = []
|
||||
else:
|
||||
# Handle malformed format (list of lists or invalid entries)
|
||||
flat_data = []
|
||||
for item in data:
|
||||
if isinstance(item, list):
|
||||
flat_data.extend(item)
|
||||
else:
|
||||
if isinstance(item, str) and item.startswith('https://'):
|
||||
flat_data.append(item)
|
||||
used_images.update(flat_data)
|
||||
elif isinstance(item, list):
|
||||
logging.warning(f"Fixing malformed entry in {used_images_file}: {item}")
|
||||
flat_data.extend([sub_item for sub_item in item if isinstance(sub_item, str) and sub_item.startswith('https://')])
|
||||
else:
|
||||
logging.warning(f"Skipping invalid entry in {used_images_file}: {item}")
|
||||
data = flat_data
|
||||
used_images.update(data)
|
||||
logging.info(f"Loaded {len(used_images)} used image URLs from {used_images_file}")
|
||||
except Exception as e:
|
||||
logging.warning(f"Failed to load used images from {used_images_file}: {e}")
|
||||
logging.warning(f"Failed to load used images from {used_images_file}: {e}. Resetting file.")
|
||||
used_images = set()
|
||||
with open(used_images_file, 'w') as f:
|
||||
json.dump([], f)
|
||||
|
||||
# Function to save used_images to file
|
||||
def save_used_images():
|
||||
|
||||
Reference in New Issue
Block a user