1/* 2 * cite.ts 3 * 4 * Copyright (C) 2021 by RStudio, PBC 5 * 6 * Unless you have received this program directly from RStudio pursuant 7 * to the terms of a commercial license agreement with RStudio, then 8 * this program is licensed to you under the terms of version 3 of the 9 * GNU Affero General Public License. This program is distributed WITHOUT 10 * ANY EXPRESS OR IMPLIED WARRANTY, INCLUDING THOSE OF NON-INFRINGEMENT, 11 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Please refer to the 12 * AGPL (http://www.gnu.org/licenses/agpl-3.0.txt) for more details. 13 * 14 */ 15 16import { CSLName, CSLDate, CSL } from './csl'; 17import { InsertCiteProps, InsertCiteUI } from './ui-dialogs'; 18import { urlForDOI } from './doi'; 19 20export const kInvalidCiteKeyChars = /[\]\[\s@',\\\#}{~%&\$\^_]/g; 21const kCiteIdLeadingLength = 8; 22 23export function createUniqueCiteId(existingIds: string[], baseId: string): string { 24 let count = 0; 25 26 // Remove any non-8bit ascii characters 27 let asciiOnlyBaseId = ''; 28 for (let i = 0; i < baseId.length; i++) { 29 const char = baseId.charCodeAt(i); 30 if (char <= 255) { 31 asciiOnlyBaseId = asciiOnlyBaseId + String.fromCharCode(char); 32 } 33 } 34 35 // If there are no characters left, just used a placeholder 36 if (asciiOnlyBaseId.length === 0) { 37 asciiOnlyBaseId = 'cite'; 38 } 39 40 // The base ID but with invalid characters replaced 41 let safeBaseId = asciiOnlyBaseId.replace(kInvalidCiteKeyChars, ''); 42 43 // Ensure that this is a valid citation, stripping any invalid characters 44 let proposedId = safeBaseId; 45 46 // If there is a conflict with an existing id, we will append 47 // the following character and try again. If the conflict continues with 48 // the postfix character added, we'll increment and keep going through the 49 // alphabet 50 const disambiguationStartCharacter = 97; // a 51 52 while (existingIds.includes(proposedId)) { 53 // If we've wrapped around to a and we haven't found a unique entry 54 // Add an 'a' to the end and try again. Will ultimately create an entry like 55 // Teague2012aaaf 56 if (count !== 0 && count % 26 === 0) { 57 safeBaseId = safeBaseId + String.fromCharCode(disambiguationStartCharacter); 58 } 59 60 const postfix = String.fromCharCode(disambiguationStartCharacter + (count % 26)); 61 proposedId = safeBaseId + postfix; 62 count++; 63 } 64 return proposedId; 65} 66 67// Suggests a bibliographic identifier based upon the source 68export function suggestCiteId(existingIds: string[], csl: CSL) { 69 const author = csl.author; 70 const issued = csl.issued; 71 72 // Try to get the last name 73 let citeIdLeading = ''; 74 if (author && author.length > 0) { 75 if (author[0].family) { 76 citeIdLeading = author[0].family; 77 } else if (author[0].literal) { 78 citeIdLeading = author[0].literal; 79 } 80 } 81 82 // If we can't use author information, try using short title, 83 // the title, or perhaps the type to construct a leading part of the 84 // citeId. 85 if (citeIdLeading.length === 0) { 86 const shortTitle = csl['short-title']; 87 if (shortTitle && shortTitle?.length > 0) { 88 citeIdLeading = shortTitle.substr(0, Math.min(kCiteIdLeadingLength, shortTitle.length)); 89 } else if (csl.title) { 90 citeIdLeading = csl.title.substr(0, Math.min(kCiteIdLeadingLength, csl.title.length)); 91 } else { 92 citeIdLeading = csl.type; 93 } 94 } 95 96 // Try to get the publication year 97 let datePart = ''; 98 if (issued && issued['date-parts'] && issued['date-parts'].length > 0) { 99 const yearIssued = issued['date-parts'][0][0]; 100 // Sometimes, data arrives with a null value, ignore null 101 if (yearIssued) { 102 datePart = yearIssued + ''; 103 } 104 } 105 106 // Create a deduplicated string against the existing entries 107 let baseId = `${citeIdLeading.toLowerCase()}${datePart}`; 108 if (baseId.length === 0) { 109 baseId = 'untitled'; 110 } 111 112 return createUniqueCiteId(existingIds, baseId); 113} 114 115export interface CiteField { 116 name: string; 117 value: string; 118} 119 120export function urlForCitation(csl: CSL): string | undefined { 121 if (csl.URL) { 122 return csl.URL; 123 } else if (csl.DOI) { 124 return urlForDOI(csl.DOI); 125 } 126} 127 128export function formatForPreview(csl: CSL): CiteField[] { 129 const pairs = new Array<CiteField>(); 130 if (csl.title) { 131 pairs.push({ name: 'Title', value: csl.title }); 132 } 133 pairs.push({ name: 'Authors', value: formatAuthors(csl.author, 255) }); 134 if (csl.issued && isValidDate(csl.issued)) { 135 pairs.push({ name: 'Issue Date', value: formatIssuedDate(csl.issued) }); 136 } 137 138 const containerTitle = csl['container-title']; 139 if (containerTitle) { 140 pairs.push({ name: 'Publication', value: containerTitle }); 141 } 142 143 const volume = csl.volume; 144 if (volume) { 145 pairs.push({ name: 'Volume', value: volume }); 146 } 147 148 const page = csl.page; 149 if (page) { 150 pairs.push({ name: 'Page(s)', value: page }); 151 } 152 153 const cslAny = csl as { [key: string]: any }; 154 Object.keys(csl).forEach(key => { 155 if (!kFilteredFields.includes(key)) { 156 const value = cslAny[key]; 157 // Don't display complex fields or fields that aren't strings 158 if (typeof value === 'string') { 159 // Capitalize preview names 160 const name = key.charAt(0).toUpperCase() + key.slice(1); 161 pairs.push({ name, value }); 162 } 163 } 164 }); 165 166 return pairs; 167} 168 169const kFilteredFields = [ 170 'id', 171 'title', 172 'author', 173 'issued', 174 'container-title', 175 'volume', 176 'page', 177 'abstract', 178 'provider', 179]; 180 181// Sometimes, data arrives with a null value 182// This function will validate that the year (required) doesn't 183// contain null 184function isValidDate(date: CSLDate): boolean { 185 const dateParts = date['date-parts']; 186 if (dateParts) { 187 const invalidElement = dateParts.find(datePart => datePart[0] === null); 188 return invalidElement === undefined; 189 } 190 return true; 191} 192 193// TODO: Needs to support localization of the templated strings 194const kEtAl = 'et al.'; 195export function formatAuthors(authors?: CSLName[], maxLength?: number): string { 196 // No author(s) specified 197 if (!authors || authors.length === 0) { 198 return ''; 199 } 200 201 return authors 202 .map(author => { 203 if (author.literal?.length) { 204 return author.literal; 205 } else if (author.given?.length && author.family?.length) { 206 // Family and Given name 207 return `${author.family}, ${author.given.substring(0, 1)}`; 208 } else if (author.family?.length) { 209 // Family name only 210 return `${author.family}`; 211 } else { 212 return ''; 213 } 214 }) 215 .reduce((previous, current, index, array) => { 216 // Ignore any additional authors if the string 217 // exceeds the maximum length 218 if ((maxLength && previous.length >= maxLength) || previous.endsWith(kEtAl)) { 219 return previous; 220 } 221 222 if (index === 0) { 223 // Too long, truncate 224 if (maxLength && current.length > maxLength) { 225 return `${current.substring(0, maxLength - 1)}…`; 226 } 227 // The first author 228 return current; 229 } else if (index > 0 && index === array.length - 1) { 230 // The last author 231 return addAuthorOrEtAl(previous, `${previous}, and ${current}`, maxLength); 232 } else { 233 // Middle authors 234 return addAuthorOrEtAl(previous, `${previous}, ${current}`, maxLength); 235 } 236 }); 237} 238 239function addAuthorOrEtAl(previousAuthorStr: string, newAuthorStr: string, maxLength?: number) { 240 // if adding the string would make it too long, truncate 241 if (maxLength && newAuthorStr.length > maxLength) { 242 return etAl(previousAuthorStr, maxLength); 243 } 244 return newAuthorStr; 245} 246 247function etAl(authorStr: string, maxLength: number) { 248 // First try just using et al., then shorten existing 249 // author to accomodate 250 const etAlStr = `${authorStr} ${kEtAl}`; 251 if (maxLength && etAlStr.length > maxLength) { 252 // First try to truncate to a space 253 const lastSpace = authorStr.lastIndexOf(' '); 254 if (lastSpace) { 255 return `${authorStr.substr(0, lastSpace)} ${kEtAl}`; 256 } else { 257 // As a last resort, truncate with ellipsis 258 const excessLength = etAlStr.length - maxLength - 1; 259 return `${authorStr.substr(0, authorStr.length - excessLength)}… ${kEtAl}`; 260 } 261 } 262 return etAlStr; 263} 264 265// TODO: Needs to support localization of the templated strings 266export function formatIssuedDate(date: CSLDate | undefined): string { 267 // No issue date for this 268 if (!date) { 269 return ''; 270 } 271 272 const dateParts = date['date-parts']; 273 if (dateParts) { 274 switch (dateParts.length) { 275 // There is a date range 276 case 2: 277 return `${dateParts[0][0]}-${dateParts[1][0]}`; 278 // Only a single date 279 case 1: 280 // Note that it is possible to receive an entry with a single null entry 281 // For examples: 282 // 10.1163/1874-6772_seg_a44_588 283 const singleDatePart = dateParts[0][0]; 284 return `${singleDatePart ? singleDatePart : ''}`; 285 286 // Seems like a malformed date :( 287 case 0: 288 default: 289 return ''; 290 } 291 } 292 return ''; 293} 294 295export function citeUI(citeProps: InsertCiteProps): InsertCiteUI { 296 if (citeProps.csl) { 297 const suggestedId = suggestCiteId(citeProps.existingIds, citeProps.csl); 298 const previewFields = formatForPreview(citeProps.csl); 299 return { 300 suggestedId, 301 previewFields, 302 }; 303 } else { 304 // This should never happen - this function should always be called with a work 305 return { 306 suggestedId: '', 307 previewFields: [], 308 }; 309 } 310} 311