1# convert i
2# xsltproc itunes.xsl ~/Music/iTunes/iTunes\ Music\ Library.xml > itunes.xml
3# or with Sxslt
4#
5#   user  system elapsed
6#  7.514   0.090   7.981
7system.time({
8library(Sxslt)
9doc = xsltApplyStyleSheet("~/itunes.xml", "~/Projects/org/omegahat/XML/RS/examples/itunes.xsl")
10top = xmlRoot(doc$doc)
11songs.xsl = xmlApply(top, function(x) xmlSApply(x, xmlValue))
12})
13
14
15#####################
16
17# As tempting as it is to take the xmlRoot() in this next command,
18# that will allow the XML document to be freed and then a crash will ensue.
19
20doc = xmlInternalTreeParse("~/Projects/org/omegahat/XML/RS/examples/itunes.xml")
21# fields = unique(unlist(xmlApply(top, names)))
22songs = xmlApply(xmlRoot(doc), function(x) xmlSApply(x, xmlValue))
23
24########################
25# Working form the original format of /plist/dict/dict/dict/
26doc = xmlInternalTreeParse("~/itunes.xml")
27dicts = doc["/plist/dict/dict/dict"]
28
29transform =
30function(dict)
31{
32  vals = xmlSApply(dict, xmlValue)
33  i = seq(1, by = 2, length = length(vals)/2)
34  structure(vals[i + 1], names = gsub(" ", "_", vals[i]))
35}
36
37songs = lapply(dicts, transform)
38
39
40# For reading, xpath and lapply()
41#   user  system elapsed
42#   6.784   0.073   7.153
43
44##########################################
45
46
47# distribution of bit rates for sampling of the sound.
48table(as.numeric(sapply(songs, "[[", "Bit_Rate")))
49
50  # How often each song was played.
51hist(as.numeric(sapply(songs, "[[", "Play_Count")))
52
53
54# Number of songs on each album
55hist(table(sapply(songs, "[", "Album")))
56
57
58# Year song was recorded (?)
59hist(as.numeric(sapply(songs, "[", "Year")))
60
61
62# Song size
63hist(as.numeric(sapply(songs, "[", "Total_Time")))
64
65# Album time
66album.time = tapply(songs, sapply(songs, "[", "Album"), function(x) sum(as.numeric(sapply(x, "[", "Total_Time"))/1000))
67
68
69dateAdded = as.POSIXct(strptime(sapply(songs, "[", "Date_Added"), "%Y-%m-%dT%H:%M:%S"))
70 #XXX
71hist(as.numeric(dateAdded))
72
73
74 # Artists with most songs
75sort(table(sapply(songs, "[", "Artist")), decreasing = TRUE)[1:40]
76
77
78 # How many songs on single and double "albums"
79table(sapply(songs, "[", "Disc_Number"))
80
81
82
83table(sapply(songs, "[", "Kind"))
84
85table(sapply(songs, "[", "Genre"))
86
87
88 # Check the sampling rate for points off the line.
89plot(as.numeric(sapply(songs, "[", "Total_Time")), as.numeric(sapply(songs, "[", "Size")))
90