1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2006, 2018, Oracle and/or its affiliates. All rights reserved.
22  * Portions Copyright (c) 2017, 2020, Chris Fraire <cfraire@me.com>.
23  * Portions Copyright (c) 2020, Ric Harris <harrisric@users.noreply.github.com>.
24  */
25 package org.opengrok.indexer.history;
26 
27 import java.io.BufferedInputStream;
28 import java.io.ByteArrayInputStream;
29 import java.io.File;
30 import java.io.IOException;
31 import java.io.InputStream;
32 import java.text.ParseException;
33 import java.util.ArrayList;
34 import java.util.HashSet;
35 import java.util.List;
36 import java.util.Set;
37 import java.util.logging.Level;
38 import java.util.logging.Logger;
39 
40 import javax.xml.XMLConstants;
41 import javax.xml.parsers.SAXParser;
42 import javax.xml.parsers.SAXParserFactory;
43 
44 import org.opengrok.indexer.configuration.RuntimeEnvironment;
45 import org.opengrok.indexer.logger.LoggerFactory;
46 import org.opengrok.indexer.util.Executor;
47 import org.xml.sax.Attributes;
48 import org.xml.sax.SAXException;
49 import org.xml.sax.ext.DefaultHandler2;
50 
51 /**
52  * Parse source history for a Subversion Repository.
53  *
54  * @author Trond Norbye
55  */
56 class SubversionHistoryParser implements Executor.StreamHandler {
57 
58     private static final Logger LOGGER = LoggerFactory.getLogger(SubversionHistoryParser.class);
59 
60     private SAXParser saxParser = null;
61     private Handler handler;
62 
63     private static class Handler extends DefaultHandler2 {
64 
65         /**
66          * Example of the longest date format that we should accept - SimpleDateFormat cannot cope with micro/nano seconds.
67          */
68         static final int SVN_MILLIS_DATE_LENGTH = "2020-03-26T15:38:55.999Z".length();
69 
70         final String prefix;
71         final String home;
72         final int length;
73         final List<HistoryEntry> entries = new ArrayList<>();
74         final Set<String> renamedFiles = new HashSet<>();
75         final SubversionRepository repository;
76         HistoryEntry entry;
77         StringBuilder sb;
78         boolean isRenamed;
79 
Handler(String home, String prefix, int length, SubversionRepository repository)80         Handler(String home, String prefix, int length, SubversionRepository repository) {
81             this.home = home;
82             this.prefix = prefix;
83             this.length = length;
84             this.repository = repository;
85             sb = new StringBuilder();
86         }
87 
getRenamedFiles()88         List<String> getRenamedFiles() {
89             return new ArrayList<>(renamedFiles);
90         }
91 
92         @Override
startElement(String uri, String localName, String qname, Attributes attr)93         public void startElement(String uri, String localName, String qname, Attributes attr) {
94             isRenamed = false;
95             if ("logentry".equals(qname)) {
96                 entry = new HistoryEntry();
97                 entry.setActive(true);
98                 entry.setRevision(attr.getValue("revision"));
99             } else if ("path".equals(qname)) {
100                 isRenamed = attr.getIndex("copyfrom-path") != -1;
101             }
102             sb.setLength(0);
103         }
104 
105         @Override
endElement(String uri, String localName, String qname)106         public void endElement(String uri, String localName, String qname) throws SAXException {
107             String s = sb.toString();
108             if ("author".equals(qname)) {
109                 entry.setAuthor(s);
110             } else if ("date".equals(qname)) {
111                 try {
112                     // need to strip microseconds off - assume final character is Z otherwise invalid anyway.
113                     String dateString = s;
114                     if (s.length() > SVN_MILLIS_DATE_LENGTH) {
115                       dateString = dateString.substring(0, SVN_MILLIS_DATE_LENGTH - 1) +
116                           dateString.charAt(dateString.length() - 1);
117                     }
118                     entry.setDate(repository.parse(dateString));
119                 } catch (ParseException ex) {
120                     throw new SAXException("Failed to parse date: " + s, ex);
121                 }
122             } else if ("path".equals(qname)) {
123                 /*
124                  * We only want valid files in the repository, not the
125                  * top-level directory itself, hence the check for inequality.
126                  */
127                 if (s.startsWith(prefix) && !s.equals(prefix)) {
128                     File file = new File(home, s.substring(prefix.length()));
129                     String path = file.getAbsolutePath().substring(length);
130                     // The same file names may be repeated in many commits,
131                     // so intern them to reduce the memory footprint.
132                     entry.addFile(path.intern());
133                     if (isRenamed) {
134                         renamedFiles.add(file.getAbsolutePath().substring(home.length() + 1));
135                     }
136                 } else {
137                     LOGGER.log(Level.FINER, "Skipping file outside repository: " + s);
138                 }
139             } else if ("msg".equals(qname)) {
140                 entry.setMessage(s);
141             }
142             if ("logentry".equals(qname)) {
143                 entries.add(entry);
144             }
145             sb.setLength(0);
146         }
147 
148         @Override
characters(char[] arg0, int arg1, int arg2)149         public void characters(char[] arg0, int arg1, int arg2) {
150             sb.append(arg0, arg1, arg2);
151         }
152     }
153 
154     /**
155      * Initialize the SAX parser instance.
156      */
initSaxParser()157     private void initSaxParser() throws HistoryException {
158         SAXParserFactory factory = SAXParserFactory.newInstance();
159         saxParser = null;
160         try {
161             saxParser = factory.newSAXParser();
162             saxParser.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, ""); // Compliant
163             saxParser.setProperty(XMLConstants.ACCESS_EXTERNAL_SCHEMA, ""); // compliant
164         } catch (Exception ex) {
165             throw new HistoryException("Failed to create SAX parser", ex);
166         }
167     }
168 
169     /**
170      * Parse the history for the specified file.
171      *
172      * @param file the file to parse history for
173      * @param repos Pointer to the SubversionRepository
174      * @param sinceRevision the revision number immediately preceding the first
175      * revision we want, or {@code null} to fetch the entire history
176      * @return object representing the file's history
177      */
parse(File file, SubversionRepository repos, String sinceRevision, int numEntries, boolean interactive)178     History parse(File file, SubversionRepository repos, String sinceRevision,
179             int numEntries, boolean interactive)
180             throws HistoryException {
181 
182         initSaxParser();
183         handler = new Handler(repos.getDirectoryName(), repos.reposPath,
184                 RuntimeEnvironment.getInstance().getSourceRootPath().length(),
185                 repos);
186 
187         Executor executor;
188         try {
189             executor = repos.getHistoryLogExecutor(file, sinceRevision,
190                     numEntries, interactive);
191         } catch (IOException e) {
192             throw new HistoryException("Failed to get history for: \"" +
193                     file.getAbsolutePath() + "\"", e);
194         }
195 
196         int status = executor.exec(true, this);
197         if (status != 0) {
198             throw new HistoryException("Failed to get history for: \"" +
199                     file.getAbsolutePath() + "\" Exit code: " + status);
200         }
201 
202         List<HistoryEntry> entries = handler.entries;
203 
204         // If we only fetch parts of the history, we're not interested in
205         // sinceRevision. Remove it.
206         if (sinceRevision != null) {
207             repos.removeAndVerifyOldestChangeset(entries, sinceRevision);
208         }
209 
210         return new History(entries, handler.getRenamedFiles());
211     }
212 
213    /**
214      * Process the output from the log command and insert the HistoryEntries
215      * into the history field.
216      *
217      * @param input The output from the process
218      */
219     @Override
processStream(InputStream input)220     public void processStream(InputStream input) throws IOException {
221         try {
222             initSaxParser();
223             saxParser.parse(new BufferedInputStream(input), handler);
224         } catch (Exception e) {
225             throw new IOException("An error occurred while parsing the xml output", e);
226         }
227     }
228 
229     /**
230      * Parse the given string.
231      *
232      * @param buffer The string to be parsed
233      * @return The parsed history
234      * @throws IOException if we fail to parse the buffer
235      */
parse(String buffer)236     History parse(String buffer) throws IOException {
237         handler = new Handler("/", "", 0, new SubversionRepository());
238         processStream(new ByteArrayInputStream(buffer.getBytes("UTF-8")));
239         return new History(handler.entries, handler.getRenamedFiles());
240     }
241 }
242