1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * See LICENSE.txt included in this distribution for the specific 9 * language governing permissions and limitations under the License. 10 * 11 * When distributing Covered Code, include this CDDL HEADER in each 12 * file and include the License file at LICENSE.txt. 13 * If applicable, add the following below this CDDL HEADER, with the 14 * fields enclosed by brackets "[]" replaced with your own identifying 15 * information: Portions Copyright [yyyy] [name of copyright owner] 16 * 17 * CDDL HEADER END 18 */ 19 20 /* 21 * Copyright (c) 2006, 2018, Oracle and/or its affiliates. All rights reserved. 22 * Portions Copyright (c) 2017, 2020, Chris Fraire <cfraire@me.com>. 23 * Portions Copyright (c) 2020, Ric Harris <harrisric@users.noreply.github.com>. 24 */ 25 package org.opengrok.indexer.history; 26 27 import java.io.BufferedInputStream; 28 import java.io.ByteArrayInputStream; 29 import java.io.File; 30 import java.io.IOException; 31 import java.io.InputStream; 32 import java.text.ParseException; 33 import java.util.ArrayList; 34 import java.util.HashSet; 35 import java.util.List; 36 import java.util.Set; 37 import java.util.logging.Level; 38 import java.util.logging.Logger; 39 40 import javax.xml.XMLConstants; 41 import javax.xml.parsers.SAXParser; 42 import javax.xml.parsers.SAXParserFactory; 43 44 import org.opengrok.indexer.configuration.RuntimeEnvironment; 45 import org.opengrok.indexer.logger.LoggerFactory; 46 import org.opengrok.indexer.util.Executor; 47 import org.xml.sax.Attributes; 48 import org.xml.sax.SAXException; 49 import org.xml.sax.ext.DefaultHandler2; 50 51 /** 52 * Parse source history for a Subversion Repository. 53 * 54 * @author Trond Norbye 55 */ 56 class SubversionHistoryParser implements Executor.StreamHandler { 57 58 private static final Logger LOGGER = LoggerFactory.getLogger(SubversionHistoryParser.class); 59 60 private SAXParser saxParser = null; 61 private Handler handler; 62 63 private static class Handler extends DefaultHandler2 { 64 65 /** 66 * Example of the longest date format that we should accept - SimpleDateFormat cannot cope with micro/nano seconds. 67 */ 68 static final int SVN_MILLIS_DATE_LENGTH = "2020-03-26T15:38:55.999Z".length(); 69 70 final String prefix; 71 final String home; 72 final int length; 73 final List<HistoryEntry> entries = new ArrayList<>(); 74 final Set<String> renamedFiles = new HashSet<>(); 75 final SubversionRepository repository; 76 HistoryEntry entry; 77 StringBuilder sb; 78 boolean isRenamed; 79 Handler(String home, String prefix, int length, SubversionRepository repository)80 Handler(String home, String prefix, int length, SubversionRepository repository) { 81 this.home = home; 82 this.prefix = prefix; 83 this.length = length; 84 this.repository = repository; 85 sb = new StringBuilder(); 86 } 87 getRenamedFiles()88 List<String> getRenamedFiles() { 89 return new ArrayList<>(renamedFiles); 90 } 91 92 @Override startElement(String uri, String localName, String qname, Attributes attr)93 public void startElement(String uri, String localName, String qname, Attributes attr) { 94 isRenamed = false; 95 if ("logentry".equals(qname)) { 96 entry = new HistoryEntry(); 97 entry.setActive(true); 98 entry.setRevision(attr.getValue("revision")); 99 } else if ("path".equals(qname)) { 100 isRenamed = attr.getIndex("copyfrom-path") != -1; 101 } 102 sb.setLength(0); 103 } 104 105 @Override endElement(String uri, String localName, String qname)106 public void endElement(String uri, String localName, String qname) throws SAXException { 107 String s = sb.toString(); 108 if ("author".equals(qname)) { 109 entry.setAuthor(s); 110 } else if ("date".equals(qname)) { 111 try { 112 // need to strip microseconds off - assume final character is Z otherwise invalid anyway. 113 String dateString = s; 114 if (s.length() > SVN_MILLIS_DATE_LENGTH) { 115 dateString = dateString.substring(0, SVN_MILLIS_DATE_LENGTH - 1) + 116 dateString.charAt(dateString.length() - 1); 117 } 118 entry.setDate(repository.parse(dateString)); 119 } catch (ParseException ex) { 120 throw new SAXException("Failed to parse date: " + s, ex); 121 } 122 } else if ("path".equals(qname)) { 123 /* 124 * We only want valid files in the repository, not the 125 * top-level directory itself, hence the check for inequality. 126 */ 127 if (s.startsWith(prefix) && !s.equals(prefix)) { 128 File file = new File(home, s.substring(prefix.length())); 129 String path = file.getAbsolutePath().substring(length); 130 // The same file names may be repeated in many commits, 131 // so intern them to reduce the memory footprint. 132 entry.addFile(path.intern()); 133 if (isRenamed) { 134 renamedFiles.add(file.getAbsolutePath().substring(home.length() + 1)); 135 } 136 } else { 137 LOGGER.log(Level.FINER, "Skipping file outside repository: " + s); 138 } 139 } else if ("msg".equals(qname)) { 140 entry.setMessage(s); 141 } 142 if ("logentry".equals(qname)) { 143 entries.add(entry); 144 } 145 sb.setLength(0); 146 } 147 148 @Override characters(char[] arg0, int arg1, int arg2)149 public void characters(char[] arg0, int arg1, int arg2) { 150 sb.append(arg0, arg1, arg2); 151 } 152 } 153 154 /** 155 * Initialize the SAX parser instance. 156 */ initSaxParser()157 private void initSaxParser() throws HistoryException { 158 SAXParserFactory factory = SAXParserFactory.newInstance(); 159 saxParser = null; 160 try { 161 saxParser = factory.newSAXParser(); 162 saxParser.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, ""); // Compliant 163 saxParser.setProperty(XMLConstants.ACCESS_EXTERNAL_SCHEMA, ""); // compliant 164 } catch (Exception ex) { 165 throw new HistoryException("Failed to create SAX parser", ex); 166 } 167 } 168 169 /** 170 * Parse the history for the specified file. 171 * 172 * @param file the file to parse history for 173 * @param repos Pointer to the SubversionRepository 174 * @param sinceRevision the revision number immediately preceding the first 175 * revision we want, or {@code null} to fetch the entire history 176 * @return object representing the file's history 177 */ parse(File file, SubversionRepository repos, String sinceRevision, int numEntries, boolean interactive)178 History parse(File file, SubversionRepository repos, String sinceRevision, 179 int numEntries, boolean interactive) 180 throws HistoryException { 181 182 initSaxParser(); 183 handler = new Handler(repos.getDirectoryName(), repos.reposPath, 184 RuntimeEnvironment.getInstance().getSourceRootPath().length(), 185 repos); 186 187 Executor executor; 188 try { 189 executor = repos.getHistoryLogExecutor(file, sinceRevision, 190 numEntries, interactive); 191 } catch (IOException e) { 192 throw new HistoryException("Failed to get history for: \"" + 193 file.getAbsolutePath() + "\"", e); 194 } 195 196 int status = executor.exec(true, this); 197 if (status != 0) { 198 throw new HistoryException("Failed to get history for: \"" + 199 file.getAbsolutePath() + "\" Exit code: " + status); 200 } 201 202 List<HistoryEntry> entries = handler.entries; 203 204 // If we only fetch parts of the history, we're not interested in 205 // sinceRevision. Remove it. 206 if (sinceRevision != null) { 207 repos.removeAndVerifyOldestChangeset(entries, sinceRevision); 208 } 209 210 return new History(entries, handler.getRenamedFiles()); 211 } 212 213 /** 214 * Process the output from the log command and insert the HistoryEntries 215 * into the history field. 216 * 217 * @param input The output from the process 218 */ 219 @Override processStream(InputStream input)220 public void processStream(InputStream input) throws IOException { 221 try { 222 initSaxParser(); 223 saxParser.parse(new BufferedInputStream(input), handler); 224 } catch (Exception e) { 225 throw new IOException("An error occurred while parsing the xml output", e); 226 } 227 } 228 229 /** 230 * Parse the given string. 231 * 232 * @param buffer The string to be parsed 233 * @return The parsed history 234 * @throws IOException if we fail to parse the buffer 235 */ parse(String buffer)236 History parse(String buffer) throws IOException { 237 handler = new Handler("/", "", 0, new SubversionRepository()); 238 processStream(new ByteArrayInputStream(buffer.getBytes("UTF-8"))); 239 return new History(handler.entries, handler.getRenamedFiles()); 240 } 241 } 242