@@ -228,8 +228,6 @@ client: zulip.Client = zulip.Client(
228228 client = "ZulipRSS/" + VERSION ,
229229)
230230
231- first_message = True
232-
233231for feed_url in feed_urls :
234232 feed_hashes_file = os .path .join (
235233 opts .data_dir , urllib .parse .urlparse (feed_url ).netloc
@@ -241,7 +239,7 @@ for feed_url in feed_urls:
241239 except OSError :
242240 old_feed_hashes = {}
243241
244- new_hashes : List [str ] = []
242+ new_entries : List [tuple [ Any , str , float ] ] = []
245243 data = feedparser .parse (feed_url )
246244 feed_name : str = data .feed .title or feed_url
247245 # Safeguard to not process older entries in unordered feeds
@@ -251,32 +249,32 @@ for feed_url in feed_urls:
251249 entry_hash = compute_entry_hash (entry )
252250 entry_time , is_time_tagged = get_entry_time (entry )
253251 if (is_time_tagged and entry_time < entry_threshold ) or entry_hash in old_feed_hashes :
254- # As a safeguard against misbehaving feeds, don't try to process
255- # entries older than some threshold.
256252 continue
257- if entry_hash in old_feed_hashes :
258- # We've already seen this. No need to process any older entries.
259- break
260- if not old_feed_hashes and len (new_hashes ) >= opts .max_batch_size :
261- # On a first run, pick up the n (= opts.max_batch_size) most recent entries.
262- # An RSS feed has entries in reverse chronological order.
263- break
264-
265- response : Dict [str , Any ] = send_zulip (entry , feed_name )
266- if response ["result" ] != "success" :
267- logger .error ("Error processing %s" , feed_url )
268- logger .error ("%s" , response )
269- if first_message :
270- # This is probably some fundamental problem like the stream not
271- # existing or something being misconfigured, so bail instead of
272- # getting the same error for every RSS entry.
273- log_error_and_exit ("Failed to process first message" )
274- # Go ahead and move on -- perhaps this entry is corrupt.
275- new_hashes .append (entry_hash )
276- first_message = False
253+ new_entries .append ((entry , entry_hash , entry_time ))
277254
278- with open ( feed_hashes_file , "a" ) as f :
279- for hash in new_hashes :
280- f . write ( hash + " \n " )
255+ # We process all entries to support unordered feeds,
256+ # but post only the latest ones in chronological order.
257+ sorted_entries = sorted ( new_entries , key = lambda x : x [ 2 ])[ - opts . max_batch_size :]
281258
282- logger .info ("Sent zulips for %d %s entries" , len (new_hashes ), feed_url )
259+ with open (feed_hashes_file , "a" ) as f :
260+ for entry_tuple in sorted_entries :
261+ entry , entry_hash , _ = entry_tuple
262+
263+ response : Dict [str , Any ] = send_zulip (entry , feed_name )
264+ if response ["result" ] != "success" :
265+ logger .error ("Error processing %s" , feed_url )
266+ logger .error ("%s" , response )
267+ if not old_feed_hashes and entry_tuple == sorted_entries [0 ]:
268+ # This is probably some fundamental problem like the stream not
269+ # existing or something being misconfigured, so bail instead of
270+ # getting the same error for every RSS entry.
271+ log_error_and_exit ("Failed to process first message" )
272+ # Go ahead and move on -- perhaps this entry is corrupt.
273+ f .write (entry_hash + "\n " )
274+
275+ logger .info (
276+ "Processed %d entries from %s and sent %d zulips" ,
277+ len (new_entries ),
278+ feed_url ,
279+ len (sorted_entries ),
280+ )
0 commit comments