@@ -134,7 +134,24 @@ public class IndexDatabase {
134134
135135 private static final Logger LOGGER = LoggerFactory .getLogger (IndexDatabase .class );
136136
137- private static final Comparator <File > FILENAME_COMPARATOR = Comparator .comparing (File ::getName );
137+ @ VisibleForTesting
138+ static final Comparator <File > FILENAME_COMPARATOR = Comparator .comparing (File ::getName );
139+
140+ @ VisibleForTesting
141+ static final Comparator <Path > FILEPATH_COMPARATOR = (p1 , p2 ) -> {
142+ int nameCount = Math .min (p1 .getNameCount (), p2 .getNameCount ());
143+ int i ;
144+ for (i = 0 ; i < nameCount ; i ++) {
145+ var c1 = p1 .getName (i ).toString ();
146+ var c2 = p2 .getName (i ).toString ();
147+ if (c1 .equals (c2 )) {
148+ continue ;
149+ }
150+ return c1 .compareTo (c2 );
151+ }
152+
153+ return Integer .compare (p1 .getNameCount (), p2 .getNameCount ());
154+ };
138155
139156 private static final Set <String > CHECK_FIELDS ;
140157
@@ -197,6 +214,22 @@ public IndexDatabase() throws IOException {
197214 this (null );
198215 }
199216
217+ /**
218+ * Anyone using this constructor is supposed to never call {@link #update()}.
219+ * Do not use for anything besides testing.
220+ * @param uidIter uid iterator
221+ * @param writer index writer
222+ * @throws IOException on error
223+ */
224+ @ VisibleForTesting
225+ IndexDatabase (Project project , TermsEnum uidIter , IndexWriter writer ) throws IOException {
226+ this (project , new IndexDownArgsFactory ());
227+ this .uidIter = uidIter ;
228+ this .writer = writer ;
229+ this .completer = new PendingFileCompleter ();
230+ initialize ();
231+ }
232+
200233 /**
201234 * Create a new instance of an Index Database for a given project.
202235 *
@@ -709,8 +742,7 @@ public void update() throws IOException {
709742 if (stat == TermsEnum .SeekStatus .END ) {
710743 uidIter = null ;
711744 LOGGER .log (Level .WARNING ,
712- "Couldn''t find a start term for {0}, empty u field?" ,
713- startUid );
745+ "Couldn''t find a start term for {0}, empty u field?" , startUid );
714746 }
715747 }
716748
@@ -819,19 +851,25 @@ private void setupDeletedUids() throws IOException {
819851 Statistics stat = new Statistics ();
820852 LOGGER .log (Level .FINEST , "traversing the documents in {0} to collect uids of deleted documents" ,
821853 indexDirectory );
854+ StoredFields storedFields = reader .storedFields ();
822855 for (int i = 0 ; i < reader .maxDoc (); i ++) {
856+ Document doc = storedFields .document (i , LIVE_CHECK_FIELDS ); // use limited-field version
857+ IndexableField field = doc .getField (QueryBuilder .U );
823858 if (!liveDocs .get (i )) {
824- StoredFields storedFields = reader .storedFields ();
825- Document doc = storedFields .document (i , LIVE_CHECK_FIELDS ); // use limited-field version
826- IndexableField field = doc .getField (QueryBuilder .U );
827859 if (field != null ) {
828860 if (LOGGER .isLoggable (Level .FINEST )) {
829861 String uidString = field .stringValue ();
830- LOGGER .log (Level .FINEST , "adding ''{0}'' at {1} to deleted uid set" ,
831- new Object []{Util .uid2url (uidString ), Util .uid2date (uidString )});
862+ LOGGER .log (Level .FINEST , "adding ''{0}'' ({2}) at {1} to deleted uid set" ,
863+ new Object []{Util .uid2url (uidString ), Util .uid2date (uidString ), i });
832864 }
833865 deletedUids .add (field .stringValue ());
834866 }
867+ } else {
868+ if (field != null ) {
869+ String uidString = field .stringValue ();
870+ LOGGER .log (Level .FINEST , "live doc: ''{0}'' ({2}) at {1}" ,
871+ new Object []{Util .uid2url (uidString ), Util .uid2date (uidString ), i });
872+ }
835873 }
836874 }
837875 stat .report (LOGGER , Level .FINEST , String .format ("found %s deleted documents in %s" ,
@@ -931,12 +969,17 @@ void indexDownUsingHistory(File sourceRoot, IndexDownArgs args) throws IOExcepti
931969
932970 try (Progress progress = new Progress (LOGGER , String .format ("collecting files for %s" , project ),
933971 fileCollector .getFiles ().size ())) {
934- for (String path : fileCollector .getFiles ()) {
972+ List <Path > paths = fileCollector .getFiles ().stream ().
973+ map (Path ::of ).
974+ sorted (FILEPATH_COMPARATOR ).
975+ collect (Collectors .toList ());
976+ LOGGER .log (Level .FINEST , "collected sorted files: {0}" , paths );
977+ for (Path path : paths ) {
935978 if (isInterrupted ()) {
936979 return ;
937980 }
938- File file = new File (sourceRoot , path );
939- processFileHistoryBased (args , file , path );
981+ File file = new File (sourceRoot , path . toString () );
982+ processFileHistoryBased (args , file , path . toString () );
940983 progress .increment ();
941984 }
942985 }
@@ -1096,16 +1139,17 @@ private void removeAnnotationFile(String path) {
10961139 * and queue the removal of the associated xref file.
10971140 *
10981141 * @param removeHistory if false, do not remove history cache for this file
1142+ * @return deleted uid (as string)
10991143 * @throws java.io.IOException if an error occurs
11001144 */
1101- private void removeFile (boolean removeHistory ) throws IOException {
1145+ private String removeFile (boolean removeHistory ) throws IOException {
11021146 String path = Util .uid2url (uidIter .term ().utf8ToString ());
11031147
11041148 for (IndexChangedListener listener : listeners ) {
11051149 listener .fileRemove (path );
11061150 }
11071151
1108- removeFileDocUid (path );
1152+ String deletedUid = removeFileDocUid (path );
11091153
11101154 removeXrefFile (path );
11111155
@@ -1122,9 +1166,11 @@ private void removeFile(boolean removeHistory) throws IOException {
11221166 for (IndexChangedListener listener : listeners ) {
11231167 listener .fileRemoved (path );
11241168 }
1169+
1170+ return deletedUid ;
11251171 }
11261172
1127- private void removeFileDocUid (String path ) throws IOException {
1173+ private String removeFileDocUid (String path ) throws IOException {
11281174
11291175 // Determine if a reversal of counts is necessary, and execute if so.
11301176 if (isCountingDeltas ) {
@@ -1141,6 +1187,8 @@ private void removeFileDocUid(String path) throws IOException {
11411187 }
11421188
11431189 writer .deleteDocuments (new Term (QueryBuilder .U , uidIter .term ()));
1190+
1191+ return uidIter .term ().utf8ToString ();
11441192 }
11451193
11461194 private void decrementLOCforDoc (String path , Document doc ) {
@@ -1648,6 +1696,17 @@ void indexDown(File dir, String parent, IndexDownArgs args, Progress progress) t
16481696 }
16491697 }
16501698
1699+ /**
1700+ * wrapper for fatal errors during indexing.
1701+ */
1702+ public static class IndexerFault extends RuntimeException {
1703+ private static final long serialVersionUID = -1 ;
1704+
1705+ public IndexerFault (String message ) {
1706+ super (message );
1707+ }
1708+ }
1709+
16511710 /**
16521711 * Compared with {@link #processFile(IndexDownArgs, File, String)}, this method's file/path arguments
16531712 * represent files that have actually changed in some way, while the other method's argument represent
@@ -1660,12 +1719,14 @@ void indexDown(File dir, String parent, IndexDownArgs args, Progress progress) t
16601719 @ VisibleForTesting
16611720 void processFileHistoryBased (IndexDownArgs args , File file , String path ) throws IOException {
16621721 final boolean fileExists = file .exists ();
1663-
1722+ final Set < String > deletedUidsHere = new HashSet <>();
16641723 path = Util .fixPathIfWindows (path );
1724+
16651725 // Traverse terms until reaching document beyond path of given file.
1666- while (uidIter != null && uidIter .term () != null
1667- && uidIter .term ().compareTo (emptyBR ) != 0
1668- && Util .uid2url (uidIter .term ().utf8ToString ()).compareTo (path ) <= 0 ) {
1726+ while (uidIter != null && uidIter .term () != null && uidIter .term ().compareTo (emptyBR ) != 0
1727+ && FILEPATH_COMPARATOR .compare (
1728+ Path .of (Util .uid2url (uidIter .term ().utf8ToString ())),
1729+ Path .of (path )) <= 0 ) {
16691730
16701731 if (deletedUids .contains (uidIter .term ().utf8ToString ())) {
16711732 logIgnoredUid (uidIter .term ().utf8ToString ());
@@ -1688,9 +1749,10 @@ void processFileHistoryBased(IndexDownArgs args, File file, String path) throws
16881749 if (!matchOK ) {
16891750 removeFile (false );
16901751 addWorkHistoryBased (args , termFile , termPath );
1752+ deletedUidsHere .add (removeFile (false ));
16911753 }
16921754 } else {
1693- removeFile (!fileExists );
1755+ deletedUidsHere . add ( removeFile (!fileExists ) );
16941756 }
16951757
16961758 BytesRef next = uidIter .next ();
@@ -1703,6 +1765,18 @@ void processFileHistoryBased(IndexDownArgs args, File file, String path) throws
17031765 // That said, it is necessary to check whether the file can be accepted. This is done in the function below.
17041766 // Also, allow for broken symbolic links (File.exists() returns false for these).
17051767 if (fileExists || Files .isSymbolicLink (file .toPath ())) {
1768+ // This assumes that the last modified time is indeed what the indexer uses when adding the document.
1769+ String time = DateTools .timeToString (file .lastModified (), DateTools .Resolution .MILLISECOND );
1770+ if (deletedUidsHere .contains (Util .path2uid (path , time ))) {
1771+ //
1772+ // Adding document with the same date of a pre-existing document which is being removed
1773+ // will lead to index corruption (duplicate documents). Hence, make the indexer to fail hard.
1774+ //
1775+ throw new IndexerFault (
1776+ String .format ("attempting to add file '%s' with date matching deleted document: %s" ,
1777+ path , time ));
1778+ }
1779+
17061780 addWorkHistoryBased (args , file , path );
17071781 }
17081782 }
0 commit comments