1# convert i 2# xsltproc itunes.xsl ~/Music/iTunes/iTunes\ Music\ Library.xml > itunes.xml 3# or with Sxslt 4# 5# user system elapsed 6# 7.514 0.090 7.981 7system.time({ 8library(Sxslt) 9doc = xsltApplyStyleSheet("~/itunes.xml", "~/Projects/org/omegahat/XML/RS/examples/itunes.xsl") 10top = xmlRoot(doc$doc) 11songs.xsl = xmlApply(top, function(x) xmlSApply(x, xmlValue)) 12}) 13 14 15##################### 16 17# As tempting as it is to take the xmlRoot() in this next command, 18# that will allow the XML document to be freed and then a crash will ensue. 19 20doc = xmlInternalTreeParse("~/Projects/org/omegahat/XML/RS/examples/itunes.xml") 21# fields = unique(unlist(xmlApply(top, names))) 22songs = xmlApply(xmlRoot(doc), function(x) xmlSApply(x, xmlValue)) 23 24######################## 25# Working form the original format of /plist/dict/dict/dict/ 26doc = xmlInternalTreeParse("~/itunes.xml") 27dicts = doc["/plist/dict/dict/dict"] 28 29transform = 30function(dict) 31{ 32 vals = xmlSApply(dict, xmlValue) 33 i = seq(1, by = 2, length = length(vals)/2) 34 structure(vals[i + 1], names = gsub(" ", "_", vals[i])) 35} 36 37songs = lapply(dicts, transform) 38 39 40# For reading, xpath and lapply() 41# user system elapsed 42# 6.784 0.073 7.153 43 44########################################## 45 46 47# distribution of bit rates for sampling of the sound. 48table(as.numeric(sapply(songs, "[[", "Bit_Rate"))) 49 50 # How often each song was played. 51hist(as.numeric(sapply(songs, "[[", "Play_Count"))) 52 53 54# Number of songs on each album 55hist(table(sapply(songs, "[", "Album"))) 56 57 58# Year song was recorded (?) 59hist(as.numeric(sapply(songs, "[", "Year"))) 60 61 62# Song size 63hist(as.numeric(sapply(songs, "[", "Total_Time"))) 64 65# Album time 66album.time = tapply(songs, sapply(songs, "[", "Album"), function(x) sum(as.numeric(sapply(x, "[", "Total_Time"))/1000)) 67 68 69dateAdded = as.POSIXct(strptime(sapply(songs, "[", "Date_Added"), "%Y-%m-%dT%H:%M:%S")) 70 #XXX 71hist(as.numeric(dateAdded)) 72 73 74 # Artists with most songs 75sort(table(sapply(songs, "[", "Artist")), decreasing = TRUE)[1:40] 76 77 78 # How many songs on single and double "albums" 79table(sapply(songs, "[", "Disc_Number")) 80 81 82 83table(sapply(songs, "[", "Kind")) 84 85table(sapply(songs, "[", "Genre")) 86 87 88 # Check the sampling rate for points off the line. 89plot(as.numeric(sapply(songs, "[", "Total_Time")), as.numeric(sapply(songs, "[", "Size"))) 90