1 /* 2 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 package com.sun.swingset3.demos.table; 25 26 import java.io.BufferedReader; 27 import java.io.IOException; 28 import java.io.InputStreamReader; 29 import java.net.URL; 30 import java.net.URLConnection; 31 import java.net.URLEncoder; 32 import java.util.ArrayList; 33 34 /** 35 * Class used to support converting a movie title string into an IMDB URI 36 * corresponding to that movie's IMDB entry. Since IMDB encodes entries with 37 * an alpha-numeric key (rather than title), we have to use Yahoo search on the 38 * title and then screenscrape the search results to find the IMDB key. 39 * 40 * @author aim 41 */ 42 public class IMDBLink { 43 IMDBLink()44 private IMDBLink() { 45 } 46 47 /** 48 * @param movieTitle the title of the movie 49 * @param year the year the movie was nominated for the oscar 50 * @return String containing URI for movie's IMDB entry or null if URI could not be found 51 */ getMovieURIString(String movieTitle, int year)52 public static String getMovieURIString(String movieTitle, int year) throws IOException { 53 ArrayList<String> matches = new ArrayList<String>(); 54 URL url; 55 BufferedReader reader; 56 57 // btw, google rejects the request with a 403 return code! 58 // URL url = new URL("http://www.google.com/search?q=Dazed+and+confused"); 59 // Thank you, yahoo, for granting our search request :-) 60 try { 61 String urlKey = URLEncoder.encode(movieTitle, "UTF-8"); 62 url = new URL("http://search.yahoo.com/search?ei=utf-8&fr=sfp&p=imdb+" + 63 urlKey + "&iscqry="); 64 } catch (Exception ex) { 65 System.err.println(ex); 66 67 return null; 68 } 69 70 URLConnection conn = url.openConnection(); 71 conn.connect(); 72 73 // Get the response from Yahoo search query 74 reader = new BufferedReader(new InputStreamReader(conn.getInputStream())); 75 76 // Parse response a find each imdb/titleString result 77 String line; 78 String imdbString = ".imdb.com"; 79 String titleStrings[] = {"/title", "/Title"}; 80 81 while ((line = reader.readLine()) != null) { 82 for (String titleString : titleStrings) { 83 String scrapeKey = imdbString + titleString; 84 int index = line.indexOf(scrapeKey); 85 if (index != -1) { 86 // The IMDB key looks something like "tt0032138" 87 // so we look for the 9 characters after the scrape key 88 // to construct the full IMDB URI. 89 // e.g. http://www.imdb.com/title/tt0032138 90 int len = scrapeKey.length(); 91 String imdbURL = "http://www" + 92 line.substring(index, index + len) + 93 line.substring(index + len, index + len + 10); 94 95 if (!matches.contains(imdbURL)) { 96 matches.add(imdbURL); 97 } 98 } 99 } 100 } 101 reader.close(); 102 103 // Since imdb contains entries for multiple movies of the same titleString, 104 // use the year to find the right entry 105 if (matches.size() > 1) { 106 for (String matchURL : matches) { 107 if (verifyYear(matchURL, year)) { 108 return matchURL; 109 } 110 } 111 } 112 return matches.isEmpty()? null : matches.get(0); 113 } 114 115 verifyYear(String imdbURL, int movieYear)116 private static boolean verifyYear(String imdbURL, int movieYear) throws IOException { 117 boolean yearMatches = false; 118 119 URLConnection conn = new URL(imdbURL).openConnection(); 120 conn.connect(); 121 122 // Get the response 123 BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream())); 124 125 String line; 126 while ((line = reader.readLine()) != null) { 127 int index = line.indexOf("</title>"); 128 if (index != -1) { 129 // looking for "<title>movie title (YEAR)</title>" 130 try { 131 int year = Integer.parseInt(line.substring(index - 5, index - 1)); 132 // Movie may have been made the year prior to oscar award 133 yearMatches = year == movieYear || year == movieYear - 1; 134 135 } catch (NumberFormatException ex) { 136 // ignore title lines that have other formatting 137 } 138 break; // only interested in analyzing the one line 139 } 140 } 141 reader.close(); 142 143 return yearMatches; 144 } 145 } 146