1/*
2 * cite.ts
3 *
4 * Copyright (C) 2021 by RStudio, PBC
5 *
6 * Unless you have received this program directly from RStudio pursuant
7 * to the terms of a commercial license agreement with RStudio, then
8 * this program is licensed to you under the terms of version 3 of the
9 * GNU Affero General Public License. This program is distributed WITHOUT
10 * ANY EXPRESS OR IMPLIED WARRANTY, INCLUDING THOSE OF NON-INFRINGEMENT,
11 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Please refer to the
12 * AGPL (http://www.gnu.org/licenses/agpl-3.0.txt) for more details.
13 *
14 */
15
16import { CSLName, CSLDate, CSL } from './csl';
17import { InsertCiteProps, InsertCiteUI } from './ui-dialogs';
18import { urlForDOI } from './doi';
19
20export const kInvalidCiteKeyChars = /[\]\[\s@',\\\#}{~%&\$\^_]/g;
21const kCiteIdLeadingLength = 8;
22
23export function createUniqueCiteId(existingIds: string[], baseId: string): string {
24  let count = 0;
25
26  // Remove any non-8bit ascii characters
27  let asciiOnlyBaseId = '';
28  for (let i = 0; i < baseId.length; i++) {
29    const char = baseId.charCodeAt(i);
30    if (char <= 255) {
31      asciiOnlyBaseId = asciiOnlyBaseId + String.fromCharCode(char);
32    }
33  }
34
35  // If there are no characters left, just used a placeholder
36  if (asciiOnlyBaseId.length === 0) {
37    asciiOnlyBaseId = 'cite';
38  }
39
40  // The base ID but with invalid characters replaced
41  let safeBaseId = asciiOnlyBaseId.replace(kInvalidCiteKeyChars, '');
42
43  // Ensure that this is a valid citation, stripping any invalid characters
44  let proposedId = safeBaseId;
45
46  // If there is a conflict with an existing id, we will append
47  // the following character and try again. If the conflict continues with
48  // the postfix character added, we'll increment and keep going through the
49  // alphabet
50  const disambiguationStartCharacter = 97; // a
51
52  while (existingIds.includes(proposedId)) {
53    // If we've wrapped around to a and we haven't found a unique entry
54    // Add an 'a' to the end and try again. Will ultimately create an entry like
55    // Teague2012aaaf
56    if (count !== 0 && count % 26 === 0) {
57      safeBaseId = safeBaseId + String.fromCharCode(disambiguationStartCharacter);
58    }
59
60    const postfix = String.fromCharCode(disambiguationStartCharacter + (count % 26));
61    proposedId = safeBaseId + postfix;
62    count++;
63  }
64  return proposedId;
65}
66
67// Suggests a bibliographic identifier based upon the source
68export function suggestCiteId(existingIds: string[], csl: CSL) {
69  const author = csl.author;
70  const issued = csl.issued;
71
72  // Try to get the last name
73  let citeIdLeading = '';
74  if (author && author.length > 0) {
75    if (author[0].family) {
76      citeIdLeading = author[0].family;
77    } else if (author[0].literal) {
78      citeIdLeading = author[0].literal;
79    }
80  }
81
82  // If we can't use author information, try using short title,
83  // the title, or perhaps the type to construct a leading part of the
84  // citeId.
85  if (citeIdLeading.length === 0) {
86    const shortTitle = csl['short-title'];
87    if (shortTitle && shortTitle?.length > 0) {
88      citeIdLeading = shortTitle.substr(0, Math.min(kCiteIdLeadingLength, shortTitle.length));
89    } else if (csl.title) {
90      citeIdLeading = csl.title.substr(0, Math.min(kCiteIdLeadingLength, csl.title.length));
91    } else {
92      citeIdLeading = csl.type;
93    }
94  }
95
96  // Try to get the publication year
97  let datePart = '';
98  if (issued && issued['date-parts'] && issued['date-parts'].length > 0) {
99    const yearIssued = issued['date-parts'][0][0];
100    // Sometimes, data arrives with a null value, ignore null
101    if (yearIssued) {
102      datePart = yearIssued + '';
103    }
104  }
105
106  // Create a deduplicated string against the existing entries
107  let baseId = `${citeIdLeading.toLowerCase()}${datePart}`;
108  if (baseId.length === 0) {
109    baseId = 'untitled';
110  }
111
112  return createUniqueCiteId(existingIds, baseId);
113}
114
115export interface CiteField {
116  name: string;
117  value: string;
118}
119
120export function urlForCitation(csl: CSL): string | undefined {
121  if (csl.URL) {
122    return csl.URL;
123  } else if (csl.DOI) {
124    return urlForDOI(csl.DOI);
125  }
126}
127
128export function formatForPreview(csl: CSL): CiteField[] {
129  const pairs = new Array<CiteField>();
130  if (csl.title) {
131    pairs.push({ name: 'Title', value: csl.title });
132  }
133  pairs.push({ name: 'Authors', value: formatAuthors(csl.author, 255) });
134  if (csl.issued && isValidDate(csl.issued)) {
135    pairs.push({ name: 'Issue Date', value: formatIssuedDate(csl.issued) });
136  }
137
138  const containerTitle = csl['container-title'];
139  if (containerTitle) {
140    pairs.push({ name: 'Publication', value: containerTitle });
141  }
142
143  const volume = csl.volume;
144  if (volume) {
145    pairs.push({ name: 'Volume', value: volume });
146  }
147
148  const page = csl.page;
149  if (page) {
150    pairs.push({ name: 'Page(s)', value: page });
151  }
152
153  const cslAny = csl as { [key: string]: any };
154  Object.keys(csl).forEach(key => {
155    if (!kFilteredFields.includes(key)) {
156      const value = cslAny[key];
157      // Don't display complex fields or fields that aren't strings
158      if (typeof value === 'string') {
159        // Capitalize preview names
160        const name = key.charAt(0).toUpperCase() + key.slice(1);
161        pairs.push({ name, value });
162      }
163    }
164  });
165
166  return pairs;
167}
168
169const kFilteredFields = [
170  'id',
171  'title',
172  'author',
173  'issued',
174  'container-title',
175  'volume',
176  'page',
177  'abstract',
178  'provider',
179];
180
181// Sometimes, data arrives with a null value
182// This function will validate that the year (required) doesn't
183// contain null
184function isValidDate(date: CSLDate): boolean {
185  const dateParts = date['date-parts'];
186  if (dateParts) {
187    const invalidElement = dateParts.find(datePart => datePart[0] === null);
188    return invalidElement === undefined;
189  }
190  return true;
191}
192
193// TODO: Needs to support localization of the templated strings
194const kEtAl = 'et al.';
195export function formatAuthors(authors?: CSLName[], maxLength?: number): string {
196  // No author(s) specified
197  if (!authors || authors.length === 0) {
198    return '';
199  }
200
201  return authors
202    .map(author => {
203      if (author.literal?.length) {
204        return author.literal;
205      } else if (author.given?.length && author.family?.length) {
206        // Family and Given name
207        return `${author.family}, ${author.given.substring(0, 1)}`;
208      } else if (author.family?.length) {
209        // Family name only
210        return `${author.family}`;
211      } else {
212        return '';
213      }
214    })
215    .reduce((previous, current, index, array) => {
216      // Ignore any additional authors if the string
217      // exceeds the maximum length
218      if ((maxLength && previous.length >= maxLength) || previous.endsWith(kEtAl)) {
219        return previous;
220      }
221
222      if (index === 0) {
223        // Too long, truncate
224        if (maxLength && current.length > maxLength) {
225          return `${current.substring(0, maxLength - 1)}…`;
226        }
227        // The first author
228        return current;
229      } else if (index > 0 && index === array.length - 1) {
230        // The last author
231        return addAuthorOrEtAl(previous, `${previous}, and ${current}`, maxLength);
232      } else {
233        // Middle authors
234        return addAuthorOrEtAl(previous, `${previous}, ${current}`, maxLength);
235      }
236    });
237}
238
239function addAuthorOrEtAl(previousAuthorStr: string, newAuthorStr: string, maxLength?: number) {
240  // if adding the string would make it too long, truncate
241  if (maxLength && newAuthorStr.length > maxLength) {
242    return etAl(previousAuthorStr, maxLength);
243  }
244  return newAuthorStr;
245}
246
247function etAl(authorStr: string, maxLength: number) {
248  // First try just using et al., then shorten existing
249  // author to accomodate
250  const etAlStr = `${authorStr} ${kEtAl}`;
251  if (maxLength && etAlStr.length > maxLength) {
252    // First try to truncate to a space
253    const lastSpace = authorStr.lastIndexOf(' ');
254    if (lastSpace) {
255      return `${authorStr.substr(0, lastSpace)} ${kEtAl}`;
256    } else {
257      // As a last resort, truncate with ellipsis
258      const excessLength = etAlStr.length - maxLength - 1;
259      return `${authorStr.substr(0, authorStr.length - excessLength)}… ${kEtAl}`;
260    }
261  }
262  return etAlStr;
263}
264
265// TODO: Needs to support localization of the templated strings
266export function formatIssuedDate(date: CSLDate | undefined): string {
267  // No issue date for this
268  if (!date) {
269    return '';
270  }
271
272  const dateParts = date['date-parts'];
273  if (dateParts) {
274    switch (dateParts.length) {
275      // There is a date range
276      case 2:
277        return `${dateParts[0][0]}-${dateParts[1][0]}`;
278      // Only a single date
279      case 1:
280        // Note that it is possible to receive an entry with a single null entry
281        // For examples:
282        // 10.1163/1874-6772_seg_a44_588
283        const singleDatePart = dateParts[0][0];
284        return `${singleDatePart ? singleDatePart : ''}`;
285
286      // Seems like a malformed date :(
287      case 0:
288      default:
289        return '';
290    }
291  }
292  return '';
293}
294
295export function citeUI(citeProps: InsertCiteProps): InsertCiteUI {
296  if (citeProps.csl) {
297    const suggestedId = suggestCiteId(citeProps.existingIds, citeProps.csl);
298    const previewFields = formatForPreview(citeProps.csl);
299    return {
300      suggestedId,
301      previewFields,
302    };
303  } else {
304    // This should never happen - this function should always be called with a work
305    return {
306      suggestedId: '',
307      previewFields: [],
308    };
309  }
310}
311