1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved.
22  * Portions Copyright (c) 2017-2020, Chris Fraire <cfraire@me.com>.
23  */
24 package org.opengrok.indexer.history;
25 
26 import java.io.ByteArrayInputStream;
27 import java.io.ByteArrayOutputStream;
28 import java.io.File;
29 import java.io.FileOutputStream;
30 import java.io.IOException;
31 import java.io.InputStream;
32 import java.text.DateFormat;
33 import java.text.FieldPosition;
34 import java.text.ParseException;
35 import java.text.ParsePosition;
36 import java.text.SimpleDateFormat;
37 import java.util.ArrayList;
38 import java.util.Date;
39 import java.util.Iterator;
40 import java.util.List;
41 import java.util.Locale;
42 import java.util.TreeSet;
43 import java.util.logging.Level;
44 import java.util.logging.Logger;
45 import org.opengrok.indexer.configuration.RuntimeEnvironment;
46 import org.opengrok.indexer.logger.LoggerFactory;
47 import org.opengrok.indexer.util.BufferSink;
48 import org.opengrok.indexer.util.Executor;
49 
50 /**
51  * An interface for an external repository.
52  *
53  * @author Trond Norbye
54  */
55 public abstract class Repository extends RepositoryInfo {
56 
57     private static final long serialVersionUID = -203179700904894217L;
58 
59     private static final Logger LOGGER = LoggerFactory.getLogger(Repository.class);
60 
61     /**
62      * format used for printing the date in {@code currentVersion}.
63      * <p>
64      * NOTE: SimpleDateFormat is not thread-safe, lock must be held when formatting
65      */
66     protected static final SimpleDateFormat OUTPUT_DATE_FORMAT =
67             new SimpleDateFormat("yyyy-MM-dd HH:mm Z");
68 
69     /**
70      * The command with which to access the external repository. Can be
71      * {@code null} if the repository isn't accessed via a CLI, or if it hasn't
72      * been initialized by {@link #ensureCommand} yet.
73      */
74     protected String RepoCommand;
75 
76     protected final List<String> ignoredFiles;
77 
78     protected final List<String> ignoredDirs;
79 
80     /**
81      * List of &lt;revision, tags&gt; pairs for repositories which display tags
82      * only for files changed by the tagged commit.
83      */
84     protected TreeSet<TagEntry> tagList = null;
85 
fileHasHistory(File file)86     abstract boolean fileHasHistory(File file);
87 
88     /**
89      * Check if the repository supports {@code getHistory()} requests for whole
90      * directories at once.
91      *
92      * @return {@code true} if the repository can get history for directories
93      */
hasHistoryForDirectories()94     abstract boolean hasHistoryForDirectories();
95 
96     /**
97      * Get the history log for the specified file or directory.
98      *
99      * @param file the file to get the history for
100      * @return history log for file
101      * @throws HistoryException on error accessing the history
102      */
getHistory(File file)103     abstract History getHistory(File file) throws HistoryException;
104 
Repository()105     public Repository() {
106         super();
107         ignoredFiles = new ArrayList<>();
108         ignoredDirs = new ArrayList<>();
109     }
110 
111     /**
112      * Gets the instance's repository command, primarily for testing purposes.
113      * @return null if not {@link isWorking}, or otherwise a defined command
114      */
getRepoCommand()115     public String getRepoCommand() {
116         isWorking();
117         return RepoCommand;
118     }
119 
120     /**
121      * <p>
122      * Get the history after a specified revision.
123      * </p>
124      *
125      * <p>
126      * The default implementation first fetches the full history and then throws
127      * away the oldest revisions. This is not efficient, so subclasses should
128      * override it in order to get good performance. Once every subclass has
129      * implemented a more efficient method, the default implementation should be
130      * removed and made abstract.
131      * </p>
132      *
133      * @param file the file to get the history for
134      * @param sinceRevision the revision right before the first one to return,
135      * or {@code null} to return the full history
136      * @return partial history for file
137      * @throws HistoryException on error accessing the history
138      */
getHistory(File file, String sinceRevision)139     History getHistory(File file, String sinceRevision)
140             throws HistoryException {
141 
142         // If we want an incremental history update and get here, warn that
143         // it may be slow.
144         if (sinceRevision != null) {
145             LOGGER.log(Level.WARNING,
146                     "Incremental history retrieval is not implemented for {0}.",
147                     getClass().getSimpleName());
148             LOGGER.log(Level.WARNING,
149                     "Falling back to slower full history retrieval.");
150         }
151 
152         History history = getHistory(file);
153 
154         if (sinceRevision == null) {
155             return history;
156         }
157 
158         List<HistoryEntry> partial = new ArrayList<>();
159         for (HistoryEntry entry : history.getHistoryEntries()) {
160             partial.add(entry);
161             if (sinceRevision.equals(entry.getRevision())) {
162                 // Found revision right before the first one to return.
163                 break;
164             }
165         }
166 
167         removeAndVerifyOldestChangeset(partial, sinceRevision);
168         history.setHistoryEntries(partial);
169         return history;
170     }
171 
172     /**
173      * Remove the oldest changeset from a list (assuming sorted with most recent
174      * changeset first) and verify that it is the changeset we expected to find
175      * there.
176      *
177      * @param entries a list of {@code HistoryEntry} objects
178      * @param revision the revision we expect the oldest entry to have
179      * @throws HistoryException if the oldest entry was not the one we expected
180      */
removeAndVerifyOldestChangeset(List<HistoryEntry> entries, String revision)181     void removeAndVerifyOldestChangeset(List<HistoryEntry> entries,
182             String revision)
183             throws HistoryException {
184         HistoryEntry entry
185                 = entries.isEmpty() ? null : entries.remove(entries.size() - 1);
186 
187         // TODO We should check more thoroughly that the changeset is the one
188         // we expected it to be, since some SCMs may change the revision
189         // numbers so that identical revision numbers does not always mean
190         // identical changesets. We could for example get the cached changeset
191         // and compare more fields, like author and date.
192         if (entry == null || !revision.equals(entry.getRevision())) {
193             throw new HistoryException("Cached revision '" + revision
194                     + "' not found in the repository "
195                     + getDirectoryName());
196         }
197     }
198 
199     /**
200      * Gets the contents of a specific version of a named file, and copies
201      * into the specified target.
202      *
203      * @param target a required target file which will be overwritten
204      * @param parent the name of the directory containing the file
205      * @param basename the name of the file to get
206      * @param rev the revision to get
207      * @return {@code true} if contents were found
208      * @throws java.io.IOException if an I/O error occurs
209      */
getHistoryGet( File target, String parent, String basename, String rev)210     public boolean getHistoryGet(
211             File target, String parent, String basename, String rev)
212             throws IOException {
213         try (FileOutputStream out = new FileOutputStream(target)) {
214             return getHistoryGet(out::write, parent, basename, rev);
215         }
216     }
217 
218     /**
219      * Gets an {@link InputStream} of the contents of a specific version of a
220      * named file.
221      * @param parent the name of the directory containing the file
222      * @param basename the name of the file to get
223      * @param rev the revision to get
224      * @return a defined instance if contents were found; or else {@code null}
225      */
getHistoryGet( String parent, String basename, String rev)226     public InputStream getHistoryGet(
227             String parent, String basename, String rev) {
228         ByteArrayOutputStream out = new ByteArrayOutputStream();
229         if (getHistoryGet(out::write, parent, basename, rev)) {
230             return new ByteArrayInputStream(out.toByteArray());
231         }
232         return null;
233     }
234 
235     /**
236      * Subclasses must override to get the contents of a specific version of a
237      * named file, and copy to the specified {@code sink}.
238      *
239      * @param sink a defined instance
240      * @param parent the name of the directory containing the file
241      * @param basename the name of the file to get
242      * @param rev the revision to get
243      * @return a value indicating if the get was successful.
244      */
getHistoryGet( BufferSink sink, String parent, String basename, String rev)245     abstract boolean getHistoryGet(
246             BufferSink sink, String parent, String basename, String rev);
247 
248     /**
249      * Checks whether this parser can annotate files.
250      *
251      * @param file file to check
252      * @return <code>true</code> if annotation is supported
253      */
fileHasAnnotation(File file)254     abstract boolean fileHasAnnotation(File file);
255 
256     /**
257      * Returns if this repository tags only files changed in last commit, i.e.
258      * if we need to prepare list of repository-wide tags prior to creation of
259      * file history entries.
260      *
261      * @return True if we need tag list creation prior to file parsing, false by
262      * default.
263      */
hasFileBasedTags()264     boolean hasFileBasedTags() {
265         return false;
266     }
267 
getTagList()268     TreeSet<TagEntry> getTagList() {
269         return this.tagList;
270     }
271 
272     /**
273      * Assign tags to changesets they represent The complete list of tags must
274      * be pre-built using {@code getTagList()}. Then this function squeeze all
275      * tags to changesets which actually exist in the history of given file.
276      * Must be implemented repository-specific.
277      *
278      * @see getTagList
279      * @param hist History we want to assign tags to.
280      */
assignTagsInHistory(History hist)281     void assignTagsInHistory(History hist) throws HistoryException {
282         if (hist == null) {
283             return;
284         }
285         if (this.getTagList() == null) {
286             throw new HistoryException("Tag list was not created before assigning tags to changesets!");
287         }
288         Iterator<TagEntry> it = this.getTagList().descendingIterator();
289         TagEntry lastTagEntry = null;
290         // Go through all commits of given file
291         for (HistoryEntry ent : hist.getHistoryEntries()) {
292             // Assign all tags created since the last revision
293             // Revision in this HistoryEntry must be already specified!
294             // TODO is there better way to do this? We need to "repeat"
295             // last element returned by call to next()
296             while (lastTagEntry != null || it.hasNext()) {
297                 if (lastTagEntry == null) {
298                     lastTagEntry = it.next();
299                 }
300                 if (lastTagEntry.compareTo(ent) >= 0) {
301                     if (ent.getTags() == null) {
302                         ent.setTags(lastTagEntry.getTags());
303                     } else {
304                         ent.setTags(ent.getTags() + ", " + lastTagEntry.getTags());
305                     }
306                 } else {
307                     break;
308                 }
309                 if (it.hasNext()) {
310                     lastTagEntry = it.next();
311                 } else {
312                     lastTagEntry = null;
313                 }
314             }
315         }
316     }
317 
318     /**
319      * Create internal list of all tags in this repository.
320      *
321      * @param directory directory of the repository
322      * @param interactive true if in interactive mode
323      */
buildTagList(File directory, boolean interactive)324     protected void buildTagList(File directory, boolean interactive) {
325         this.tagList = null;
326     }
327 
328     /**
329      * Annotate the specified revision of a file.
330      *
331      * @param file the file to annotate
332      * @param revision revision of the file. Either {@code null} or a non-empty
333      * string.
334      * @return an <code>Annotation</code> object
335      * @throws java.io.IOException if an error occurs
336      */
annotate(File file, String revision)337     abstract Annotation annotate(File file, String revision) throws IOException;
338 
339     /**
340      * Return revision for annotate view.
341      *
342      * @param history_revision full revision
343      * @return revision string suitable for matching into annotation
344      */
getRevisionForAnnotate(String history_revision)345     protected String getRevisionForAnnotate(String history_revision) {
346         return history_revision;
347     }
348 
349     /**
350      * Create a history log cache for all files in this repository.
351      * {@code getHistory()} is used to fetch the history for the entire
352      * repository. If {@code hasHistoryForDirectories()} returns {@code false},
353      * this method is a no-op.
354      *
355      * @param cache the cache instance in which to store the history log
356      * @param sinceRevision if non-null, incrementally update the cache with all
357      * revisions after the specified revision; otherwise, create the full
358      * history starting with the initial revision
359      *
360      * @throws HistoryException on error
361      */
createCache(HistoryCache cache, String sinceRevision)362     final void createCache(HistoryCache cache, String sinceRevision)
363             throws HistoryException {
364         if (!isWorking()) {
365             return;
366         }
367 
368         // If we don't have a directory parser, we can't create the cache
369         // this way. Just give up and return.
370         if (!hasHistoryForDirectories()) {
371             LOGGER.log(
372                     Level.INFO,
373                     "Skipping creation of history cache for {0}, since retrieval "
374                             + "of history for directories is not implemented for this "
375                             + "repository type.", getDirectoryName());
376             return;
377         }
378 
379         File directory = new File(getDirectoryName());
380 
381         History history;
382         try {
383             history = getHistory(directory, sinceRevision);
384         } catch (HistoryException he) {
385             if (sinceRevision == null) {
386                 // Failed to get full history, so fail.
387                 throw he;
388             }
389             // Failed to get partial history. This may have been caused
390             // by changes in the revision numbers since the last update
391             // (bug #14724) so we'll try to regenerate the cache from
392             // scratch instead.
393             LOGGER.log(Level.WARNING,
394                     "Failed to get partial history. Attempting to "
395                     + "recreate the history cache from scratch.", he);
396             history = null;
397         }
398 
399         if (sinceRevision != null && history == null) {
400             // Failed to get partial history, now get full history instead.
401             history = getHistory(directory);
402             // Got full history successfully. Clear the history cache so that
403             // we can recreate it from scratch.
404             cache.clear(this);
405         }
406 
407         // We need to refresh list of tags for incremental reindex.
408         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
409         if (env.isTagsEnabled() && this.hasFileBasedTags()) {
410             this.buildTagList(new File(this.getDirectoryName()), false);
411         }
412 
413         if (history != null) {
414             cache.store(history, this);
415         }
416     }
417 
418     /**
419      * Check if this it the right repository type for the given file.
420      *
421      * @param file File to check if this is a repository for.
422      * @param interactive is this run from interactive mode
423      * @return true if this is the correct repository for this file/directory.
424      */
isRepositoryFor(File file, boolean interactive)425     abstract boolean isRepositoryFor(File file, boolean interactive);
426 
isRepositoryFor(File file)427     public final boolean isRepositoryFor(File file) {
428         return isRepositoryFor(file, false);
429     }
430 
431     /**
432      * Determine parent of this repository.
433      */
determineParent(boolean interactive)434     abstract String determineParent(boolean interactive) throws IOException;
435 
436     /**
437      * Determine parent of this repository.
438      * @return parent
439      * @throws java.io.IOException I/O exception
440      */
determineParent()441     public final String determineParent() throws IOException {
442         return determineParent(false);
443     }
444 
445     /**
446      * Determine branch of this repository.
447      */
determineBranch(boolean interactive)448     abstract String determineBranch(boolean interactive) throws IOException;
449 
450     /**
451      * Determine branch of this repository.
452      * @return branch
453      * @throws java.io.IOException I/O exception
454      */
determineBranch()455     public final String determineBranch() throws IOException {
456         return determineBranch(false);
457     }
458 
459     /**
460      * Get list of ignored files for this repository.
461      * @return list of strings
462      */
getIgnoredFiles()463     public List<String> getIgnoredFiles() {
464         return ignoredFiles;
465     }
466 
467     /**
468      * Get list of ignored directories for this repository.
469      * @return list of strings
470      */
getIgnoredDirs()471     public List<String> getIgnoredDirs() {
472         return ignoredDirs;
473     }
474 
475     /**
476      * Determine and return the current version of the repository.
477      *
478      * This operation is consider "heavy" so this function should not be
479      * called on every web request.
480      *
481      * @param interactive true if interactive mode
482      * @return the version
483      * @throws IOException if I/O exception occurred
484      */
determineCurrentVersion(boolean interactive)485     abstract String determineCurrentVersion(boolean interactive) throws IOException;
486 
determineCurrentVersion()487     public final String determineCurrentVersion() throws IOException {
488         return determineCurrentVersion(false);
489     }
490 
491     /**
492      * Returns true if this repository supports sub repositories (a.k.a.
493      * forests).
494      *
495      * @return true if this repository supports sub repositories
496      */
497     @SuppressWarnings("PMD.EmptyMethodInAbstractClassShouldBeAbstract")
supportsSubRepositories()498     boolean supportsSubRepositories() {
499         return false;
500     }
501 
502     /**
503      * Subclasses can override to get a value indicating that a repository implementation is nestable.
504      * @return {@code false}
505      */
isNestable()506     boolean isNestable() {
507         return false;
508     }
509 
getDateFormat()510     private DateFormat getDateFormat() {
511         return new RepositoryDateFormat();
512     }
513 
514     /**
515      * Format the given date according to the output format.
516      *
517      * @param date the date
518      * @return the string representing the formatted date
519      * @see #OUTPUT_DATE_FORMAT
520      */
format(Date date)521     public String format(Date date) {
522         synchronized (OUTPUT_DATE_FORMAT) {
523             return OUTPUT_DATE_FORMAT.format(date);
524         }
525     }
526 
527     /**
528      * Parse the given string as a date object with the repository date formats.
529      *
530      * @param dateString the string representing the date
531      * @return the instance of a date
532      * @throws ParseException when the string can not be parsed correctly
533      */
parse(String dateString)534     public Date parse(String dateString) throws ParseException {
535         final DateFormat format = getDateFormat();
536         synchronized (format) {
537             return format.parse(dateString);
538         }
539     }
540 
checkCmd(String... args)541     static Boolean checkCmd(String... args) {
542         Executor exec = new Executor(args);
543         return exec.exec(false) == 0;
544     }
545 
546     /**
547      * Set the name of the external client command that should be used to access
548      * the repository wrt. the given parameters. Does nothing, if this
549      * repository's <var>RepoCommand</var> has already been set (i.e. has a
550      * non-{@code null} value).
551      *
552      * @param propertyKey property key to lookup the corresponding system
553      * property.
554      * @param fallbackCommand the command to use, if lookup fails.
555      * @return the command to use.
556      * @see #RepoCommand
557      */
ensureCommand(String propertyKey, String fallbackCommand)558     protected String ensureCommand(String propertyKey, String fallbackCommand) {
559         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
560 
561         if (RepoCommand != null) {
562             return RepoCommand;
563         }
564 
565         RepoCommand = env.getRepoCmd(this.getClass().getCanonicalName());
566         if (RepoCommand == null) {
567             RepoCommand = System.getProperty(propertyKey, fallbackCommand);
568             env.setRepoCmd(this.getClass().getCanonicalName(), RepoCommand);
569         }
570 
571         return RepoCommand;
572     }
573 
getRepoRelativePath(final File file)574     protected String getRepoRelativePath(final File file)
575             throws IOException {
576 
577         String filename = file.getPath();
578         String repoDirName = getDirectoryName();
579 
580         String abs = file.getCanonicalPath();
581         if (abs.startsWith(repoDirName)) {
582             if (abs.length() > repoDirName.length()) {
583                 filename = abs.substring(repoDirName.length() + 1);
584             }
585         } else {
586             abs = file.getAbsolutePath();
587             if (abs.startsWith(repoDirName) && abs.length() >
588                 repoDirName.length()) {
589                 filename = abs.substring(repoDirName.length() + 1);
590             }
591         }
592         return filename;
593     }
594 
595     /**
596      * Copies all bytes from {@code in} to the {@code sink}.
597      * @return the number of writes to {@code sink}
598      */
copyBytes(BufferSink sink, InputStream in)599     static int copyBytes(BufferSink sink, InputStream in) throws IOException {
600         byte[] buffer = new byte[8 * 1024];
601         int iterations = 0;
602         int len;
603         while ((len = in.read(buffer)) != -1) {
604             if (len > 0) {
605                 ++iterations;
606                 sink.write(buffer, 0, len);
607             }
608         }
609         return iterations;
610     }
611 
612     static class HistoryRevResult {
613         boolean success;
614         int iterations;
615     }
616 
617     private class RepositoryDateFormat extends DateFormat {
618         private static final long serialVersionUID = -6951382723884436414L;
619 
620         private final Locale locale = Locale.ENGLISH;
621         // NOTE: SimpleDateFormat is not thread-safe, lock must be held when used
622         private final SimpleDateFormat[] formatters = new SimpleDateFormat[datePatterns.length];
623 
624         {
625             // initialize date formatters
626             for (int i = 0; i < datePatterns.length; i++) {
627                 formatters[i] = new SimpleDateFormat(datePatterns[i], locale);
628                 /*
629                  * TODO: the following would be nice - but currently it
630                  * could break the compatibility with some repository dates
631                  */
632                 // formatters[i].setLenient(false);
633             }
634         }
635 
636         @Override
format(Date date, StringBuffer toAppendTo, FieldPosition fieldPosition)637         public StringBuffer format(Date date, StringBuffer toAppendTo, FieldPosition fieldPosition) {
638             throw new UnsupportedOperationException("not implemented");
639         }
640 
641         @Override
parse(String source)642         public Date parse(String source) throws ParseException {
643             ParseException head = null, tail = null;
644             for (SimpleDateFormat formatter : formatters) {
645                 try {
646                     return formatter.parse(source);
647                 } catch (ParseException ex1) {
648                     /*
649                      * Adding all exceptions together to get some info in
650                      * the logs.
651                      */
652                     ex1 = new ParseException(
653                             String.format("%s with format \"%s\" and locale \"%s\"",
654                                     ex1.getMessage(),
655                                     formatter.toPattern(),
656                                     locale),
657                             ex1.getErrorOffset()
658                     );
659                     if (head == null) {
660                         head = tail = ex1;
661                     } else {
662                         tail.initCause(ex1);
663                         tail = ex1;
664                     }
665                 }
666             }
667             throw head != null ? head : new ParseException(String.format("Unparseable date: \"%s\"", source), 0);
668         }
669 
670         @Override
parse(String source, ParsePosition pos)671         public Date parse(String source, ParsePosition pos) {
672             throw new UnsupportedOperationException("not implemented");
673         }
674     }
675 }
676