@@ -14,7 +14,7 @@ import sys
1414import time
1515import urllib .parse
1616from html .parser import HTMLParser
17- from typing import Any , Dict , List , Optional , Tuple
17+ from typing import Any , Dict , List
1818
1919import feedparser
2020from typing_extensions import override
@@ -189,6 +189,11 @@ def elide_subject(subject: str) -> str:
189189 return subject
190190
191191
192+ def get_entry_time (entry : Dict [str , Any ]) -> tuple [float , bool ]:
193+ entry_time = entry .get ("published" , entry .get ("updated" ))
194+ return (calendar .timegm (entry_time ), True ) if entry_time else (float ("-inf" ), False )
195+
196+
192197def send_zulip (entry : Any , feed_name : str ) -> Dict [str , Any ]:
193198 body : str = entry .summary
194199 if opts .unwrap :
@@ -239,17 +244,13 @@ for feed_url in feed_urls:
239244 new_hashes : List [str ] = []
240245 data = feedparser .parse (feed_url )
241246 feed_name : str = data .feed .title or feed_url
247+ # Safeguard to not process older entries in unordered feeds
248+ entry_threshold = time .time () - opts .earliest_entry_age * 60 * 60 * 24
242249
243250 for entry in data .entries :
244251 entry_hash = compute_entry_hash (entry )
245- # An entry has either been published or updated.
246- entry_time : Optional [Tuple [int , int ]] = entry .get (
247- "published_parsed" , entry .get ("updated_parsed" )
248- )
249- if (
250- entry_time is not None
251- and time .time () - calendar .timegm (entry_time ) > opts .earliest_entry_age * 60 * 60 * 24
252- ):
252+ entry_time , is_time_tagged = get_entry_time (entry )
253+ if (is_time_tagged and entry_time < entry_threshold ) or entry_hash in old_feed_hashes :
253254 # As a safeguard against misbehaving feeds, don't try to process
254255 # entries older than some threshold.
255256 continue
0 commit comments