fix
This commit is contained in:
+11
-5
@@ -247,15 +247,21 @@ def curate_from_rss():
|
|||||||
attempts += 1
|
attempts += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Remove the original title from the summary
|
# Remove the original title from the summary while preserving paragraphs
|
||||||
title_pattern = re.compile(
|
title_pattern = re.compile(
|
||||||
r'\*\*' + re.escape(title) + r'\*\*|' + re.escape(title),
|
r'\*\*' + re.escape(title) + r'\*\*|' + re.escape(title),
|
||||||
re.IGNORECASE
|
re.IGNORECASE
|
||||||
)
|
)
|
||||||
final_summary = title_pattern.sub('', final_summary).strip()
|
# Split into paragraphs, process each one, then rejoin
|
||||||
# Clean up any extra spaces or newlines left after removal
|
paragraphs = final_summary.split('\n')
|
||||||
final_summary = re.sub(r'\s+', ' ', final_summary)
|
cleaned_paragraphs = []
|
||||||
final_summary = '\n'.join(para.strip() for para in final_summary.split('\n') if para.strip())
|
for para in paragraphs:
|
||||||
|
if para.strip():
|
||||||
|
# Remove the title and normalize spaces within the paragraph
|
||||||
|
cleaned_para = title_pattern.sub('', para).strip()
|
||||||
|
cleaned_para = re.sub(r'\s+', ' ', cleaned_para)
|
||||||
|
cleaned_paragraphs.append(cleaned_para)
|
||||||
|
final_summary = '\n'.join(cleaned_paragraphs)
|
||||||
|
|
||||||
final_summary = insert_link_naturally(final_summary, source_name, link)
|
final_summary = insert_link_naturally(final_summary, source_name, link)
|
||||||
post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
|
post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
|
||||||
|
|||||||
+28
-15
@@ -64,9 +64,10 @@ def save_json_file(filename, key, value):
|
|||||||
data.append(entry)
|
data.append(entry)
|
||||||
# Special handling for used_images.json to save as a flat list
|
# Special handling for used_images.json to save as a flat list
|
||||||
if filename.endswith('used_images.json'):
|
if filename.endswith('used_images.json'):
|
||||||
flat_data = [item["title"] for item in data if isinstance(item, dict) and "title" in item]
|
# Update the used_images set directly to keep it in sync
|
||||||
|
used_images.add(key)
|
||||||
with open(filename, 'w') as f:
|
with open(filename, 'w') as f:
|
||||||
json.dump(flat_data, f)
|
json.dump(list(used_images), f)
|
||||||
else:
|
else:
|
||||||
with open(filename, 'w') as f:
|
with open(filename, 'w') as f:
|
||||||
for item in data:
|
for item in data:
|
||||||
@@ -981,22 +982,34 @@ used_images = set()
|
|||||||
if os.path.exists(used_images_file):
|
if os.path.exists(used_images_file):
|
||||||
try:
|
try:
|
||||||
with open(used_images_file, 'r') as f:
|
with open(used_images_file, 'r') as f:
|
||||||
data = json.load(f)
|
content = f.read().strip()
|
||||||
# Handle malformed format (list of lists)
|
if not content:
|
||||||
if isinstance(data, list) and data and isinstance(data[0], list):
|
logging.warning(f"Used images file {used_images_file} is empty. Resetting to empty list.")
|
||||||
logging.warning(f"Fixing malformed used_images.json format: {data[:2]}...")
|
data = []
|
||||||
flat_data = []
|
|
||||||
for item in data:
|
|
||||||
if isinstance(item, list):
|
|
||||||
flat_data.extend(item)
|
|
||||||
else:
|
|
||||||
flat_data.append(item)
|
|
||||||
used_images.update(flat_data)
|
|
||||||
else:
|
else:
|
||||||
used_images.update(data)
|
data = json.loads(content)
|
||||||
|
if not isinstance(data, list):
|
||||||
|
logging.warning(f"Invalid format in {used_images_file}: expected a list, got {type(data)}. Resetting.")
|
||||||
|
data = []
|
||||||
|
else:
|
||||||
|
# Handle malformed format (list of lists or invalid entries)
|
||||||
|
flat_data = []
|
||||||
|
for item in data:
|
||||||
|
if isinstance(item, str) and item.startswith('https://'):
|
||||||
|
flat_data.append(item)
|
||||||
|
elif isinstance(item, list):
|
||||||
|
logging.warning(f"Fixing malformed entry in {used_images_file}: {item}")
|
||||||
|
flat_data.extend([sub_item for sub_item in item if isinstance(sub_item, str) and sub_item.startswith('https://')])
|
||||||
|
else:
|
||||||
|
logging.warning(f"Skipping invalid entry in {used_images_file}: {item}")
|
||||||
|
data = flat_data
|
||||||
|
used_images.update(data)
|
||||||
logging.info(f"Loaded {len(used_images)} used image URLs from {used_images_file}")
|
logging.info(f"Loaded {len(used_images)} used image URLs from {used_images_file}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f"Failed to load used images from {used_images_file}: {e}")
|
logging.warning(f"Failed to load used images from {used_images_file}: {e}. Resetting file.")
|
||||||
|
used_images = set()
|
||||||
|
with open(used_images_file, 'w') as f:
|
||||||
|
json.dump([], f)
|
||||||
|
|
||||||
# Function to save used_images to file
|
# Function to save used_images to file
|
||||||
def save_used_images():
|
def save_used_images():
|
||||||
|
|||||||
Reference in New Issue
Block a user