1 /*
2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  */
23 
24 package com.sun.swingset3.demos.table;
25 
26 import java.io.BufferedReader;
27 import java.io.IOException;
28 import java.io.InputStreamReader;
29 import java.net.URL;
30 import java.net.URLConnection;
31 import java.net.URLEncoder;
32 import java.util.ArrayList;
33 
34 /**
35  * Class used to support converting a movie title string into an IMDB URI
36  * corresponding to that movie's IMDB entry.   Since IMDB encodes entries with
37  * an alpha-numeric key (rather than title), we have to use Yahoo search on the
38  * title and then screenscrape the search results to find the IMDB key.
39  *
40  * @author aim
41  */
42 public class IMDBLink {
43 
IMDBLink()44     private IMDBLink() {
45     }
46 
47     /**
48      * @param movieTitle the title of the movie
49      * @param year       the year the movie was nominated for the oscar
50      * @return String containing URI for movie's IMDB entry or null if URI could not be found
51      */
getMovieURIString(String movieTitle, int year)52     public static String getMovieURIString(String movieTitle, int year) throws IOException {
53         ArrayList<String> matches = new ArrayList<String>();
54         URL url;
55         BufferedReader reader;
56 
57         // btw, google rejects the request with a 403 return code!
58         // URL url = new URL("http://www.google.com/search?q=Dazed+and+confused");
59         // Thank you, yahoo, for granting our search request :-)
60         try {
61             String urlKey = URLEncoder.encode(movieTitle, "UTF-8");
62             url = new URL("http://search.yahoo.com/search?ei=utf-8&fr=sfp&p=imdb+" +
63                     urlKey + "&iscqry=");
64         } catch (Exception ex) {
65             System.err.println(ex);
66 
67             return null;
68         }
69 
70         URLConnection conn = url.openConnection();
71         conn.connect();
72 
73         // Get the response from Yahoo search query
74         reader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
75 
76         // Parse response a find each imdb/titleString result
77         String line;
78         String imdbString = ".imdb.com";
79         String titleStrings[] = {"/title", "/Title"};
80 
81         while ((line = reader.readLine()) != null) {
82             for (String titleString : titleStrings) {
83                 String scrapeKey = imdbString + titleString;
84                 int index = line.indexOf(scrapeKey);
85                 if (index != -1) {
86                     // The IMDB key looks something like "tt0032138"
87                     // so we look for the 9 characters after the scrape key
88                     // to construct the full IMDB URI.
89                     // e.g. http://www.imdb.com/title/tt0032138
90                     int len = scrapeKey.length();
91                     String imdbURL = "http://www" +
92                             line.substring(index, index + len) +
93                             line.substring(index + len, index + len + 10);
94 
95                     if (!matches.contains(imdbURL)) {
96                         matches.add(imdbURL);
97                     }
98                 }
99             }
100         }
101         reader.close();
102 
103         // Since imdb contains entries for multiple movies of the same titleString,
104         // use the year to find the right entry
105         if (matches.size() > 1) {
106             for (String matchURL : matches) {
107                 if (verifyYear(matchURL, year)) {
108                     return matchURL;
109                 }
110             }
111         }
112         return matches.isEmpty()? null : matches.get(0);
113     }
114 
115 
verifyYear(String imdbURL, int movieYear)116     private static boolean verifyYear(String imdbURL, int movieYear) throws IOException {
117         boolean yearMatches = false;
118 
119         URLConnection conn = new URL(imdbURL).openConnection();
120         conn.connect();
121 
122         // Get the response
123         BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
124 
125         String line;
126         while ((line = reader.readLine()) != null) {
127             int index = line.indexOf("</title>");
128             if (index != -1) {
129                 // looking for "<title>movie title (YEAR)</title>"
130                 try {
131                     int year = Integer.parseInt(line.substring(index - 5, index - 1));
132                     // Movie may have been made the year prior to oscar award
133                     yearMatches = year == movieYear || year == movieYear - 1;
134 
135                 } catch (NumberFormatException ex) {
136                     // ignore title lines that have other formatting
137                 }
138                 break; // only interested in analyzing the one line
139             }
140         }
141         reader.close();
142 
143         return yearMatches;
144     }
145 }
146