1#  File src/library/graphics/R/hist.R
2#  Part of the R package, https://www.R-project.org
3#
4#  Copyright (C) 1995-2018 The R Core Team
5#
6#  This program is free software; you can redistribute it and/or modify
7#  it under the terms of the GNU General Public License as published by
8#  the Free Software Foundation; either version 2 of the License, or
9#  (at your option) any later version.
10#
11#  This program is distributed in the hope that it will be useful,
12#  but WITHOUT ANY WARRANTY; without even the implied warranty of
13#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14#  GNU General Public License for more details.
15#
16#  A copy of the GNU General Public License is available at
17#  https://www.R-project.org/Licenses/
18
19hist <- function(x, ...) UseMethod("hist")
20
21hist.default <-
22    function (x, breaks = "Sturges", freq = NULL,
23	      probability = !freq, include.lowest= TRUE,
24	      right = TRUE, density = NULL, angle = 45,
25	      col = "lightgray", border = NULL,
26	      main = paste("Histogram of", xname),
27	      xlim = range(breaks), ylim = NULL,
28	      xlab = xname, ylab,
29	      axes = TRUE, plot = TRUE, labels = FALSE, nclass = NULL,
30	      warn.unused = TRUE, ...)
31{
32    if (!is.numeric(x))
33	stop("'x' must be numeric")
34    xname <- paste(deparse(substitute(x), 500), collapse="\n")
35    n <- length(x <- x[is.finite(x)])
36    n <- as.integer(n)
37    if(is.na(n)) stop("invalid length(x)")
38    use.br <- !missing(breaks)
39    if(use.br) {
40	if(!missing(nclass))
41	    warning("'nclass' not used when 'breaks' is specified")
42    }
43    else if(!is.null(nclass) && length(nclass) == 1L)
44	breaks <- nclass
45    use.br <- use.br && (nB <- length(breaks)) > 1L
46    if(use.br)
47	breaks <- sort(breaks)
48    else {				# construct vector of breaks
49	if(!include.lowest) {
50	    include.lowest <- TRUE
51	    warning("'include.lowest' ignored as 'breaks' is not a vector")
52	}
53	if(is.character(breaks)) {
54	    breaks <- match.arg(tolower(breaks),
55				c("sturges", "fd",
56				  "freedman-diaconis", "scott"))
57	    breaks <- switch(breaks,
58			     sturges = nclass.Sturges(x),
59			     "freedman-diaconis" =,
60			     fd = nclass.FD(x),
61			     scott = nclass.scott(x),
62			     stop("unknown 'breaks' algorithm"))
63	} else if(is.function(breaks)) {
64	    breaks <- breaks(x)
65	}
66        ## if(!is.numeric(breaks) || !is.finite(breaks) || breaks < 1L)
67        ##     stop("invalid number of 'breaks'")
68        ## breaks <- pretty (range(x), n = breaks, min.n = 1)
69        ## nB <- length(breaks)
70        ## if(nB <= 1) ##-- Impossible !
71        ##     stop(gettextf("hist.default: pretty() error, breaks=%s",
72        ##                   format(breaks)), domain = NA)
73        if (length(breaks) == 1) {
74            if(!is.numeric(breaks) || !is.finite(breaks) || breaks < 1L)
75                stop("invalid number of 'breaks'")
76	    if(breaks > 1e6) { # pretty() must have n <= maximal integer
77		warning(gettextf("'breaks = %g' is too large and set to 1e6",
78				 breaks), domain = NA)
79		breaks <- 1e6L
80	    }
81            breaks <- pretty (range(x), n = breaks, min.n = 1)
82            nB <- length(breaks)
83            if(nB <= 1) ##-- Impossible !
84                stop(gettextf("hist.default: pretty() error, breaks=%s",
85                              format(breaks)), domain = NA)
86        }
87        else {
88            if(!is.numeric(breaks) || length(breaks) <= 1)
89                stop(gettextf("Invalid breakpoints produced by 'breaks(x)': %s",
90                              format(breaks)), domain = NA)
91            breaks <- sort(breaks)
92            nB <- length(breaks)
93            use.br <- TRUE # To allow equidist=FALSE below (FIXME: Find better way?)
94        }
95    }
96    nB <- as.integer(nB)
97    if(is.na(nB)) stop("invalid length(breaks)")
98
99    ## Do this *before* adding fuzz or logic breaks down...
100
101    h <- as.double(diff(breaks))
102    equidist <- !use.br || diff(range(h)) < 1e-7 * mean(h)
103    if (!use.br && any(h <= 0))
104	stop("'breaks' are not strictly increasing")
105    freq1 <- freq # we want to do missing(freq) later
106    if (is.null(freq)) {
107	freq1 <- if(!missing(probability)) !as.logical(probability) else equidist
108    } else if(!missing(probability) && any(probability == freq))
109	stop("'probability' is an alias for '!freq', however they differ.")
110
111    ## Fuzz to handle cases where points are "effectively on"
112    ## the boundaries
113    ## As one break point could be very much larger than the others,
114    ## as from 1.9.1 we no longer use the range. (PR#6931)
115    ## diddle <- 1e-7 * max(abs(range(breaks)))  ## NB: h == diff(breaks)
116    diddle <- 1e-7 * if(nB > 5) stats::median(h)
117    ## for few breaks, protect against very large bins:
118		     else if(nB <= 3) diff(range(x)) else min(h[h > 0])
119    fuzz <- if(right)
120	c(if(include.lowest) -diddle else diddle, rep.int(diddle, nB - 1L))
121    else
122	c(rep.int(-diddle, nB - 1L), if(include.lowest) diddle else -diddle)
123    fuzzybreaks <- breaks + fuzz
124    ## With the fuzz adjustment above, the "right" and "include"
125    ## arguments are often irrelevant (but not with integer data!)
126    counts <- .Call(C_BinCount, x, fuzzybreaks, right, include.lowest)
127    if (any(counts < 0L))
128	stop("negative 'counts'. Internal Error.", domain = NA)
129    if (sum(counts) < n)
130	stop("some 'x' not counted; maybe 'breaks' do not span range of 'x'")
131    dens <- counts/(n*h) # use un-fuzzed intervals
132    mids <- 0.5 * (breaks[-1L] + breaks[-nB])
133    r <- structure(list(breaks = breaks, counts = counts,
134			density = dens, mids = mids,
135			xname = xname, equidist = equidist),
136		   class = "histogram")
137    if (plot) {
138	plot(r, freq = freq1, col = col, border = border,
139	     angle = angle, density = density,
140	     main = main, xlim = xlim, ylim = ylim, xlab = xlab, ylab = ylab,
141	     axes = axes, labels = labels, ...)
142	invisible(r)
143    }
144    else { ## plot is FALSE
145    	if (warn.unused) {
146	    ## make an effort to warn about "non sensical" arguments:
147	    nf <- names(formals()) ## all formals but those:
148	    nf <- nf[is.na(match(nf, c("x", "breaks", "nclass", "plot",
149				       "include.lowest", "right")))]
150	    missE <- lapply(nf, function(n)
151			    substitute(missing(.), list(. = as.name(n))))
152	    not.miss <- ! sapply(missE, eval, envir = environment())
153	    if(any(not.miss))
154		warning(sprintf(ngettext(sum(not.miss),
155					 "argument %s is not made use of",
156					 "arguments %s are not made use of"),
157				paste(sQuote(nf[not.miss]), collapse=", ")),
158			domain = NA)
159	}
160        r
161    }
162}
163
164plot.histogram <-
165    function (x, freq = equidist, density = NULL, angle = 45,
166	      col = NULL, border = par("fg"), lty = NULL,
167	      main = paste("Histogram of", paste(x$xname, collapse="\n")),
168              sub = NULL,
169	      xlab = x$xname, ylab,
170	      xlim = range(x$breaks), ylim = NULL,
171	      axes = TRUE, labels = FALSE, add = FALSE, ann = TRUE, ...)
172{
173    equidist <-
174	if(is.logical(x$equidist)) x$equidist
175	else { h <- diff(x$breaks) ; diff(range(h)) < 1e-7 * mean(h) }
176    if(freq && !equidist)
177	warning("the AREAS in the plot are wrong -- rather use 'freq = FALSE'")
178
179    y <- if (freq) x$counts else x$density
180    nB <- length(x$breaks)
181    if(is.null(y) || 0L == nB) stop("'x' is wrongly structured")
182
183    dev.hold(); on.exit(dev.flush())
184    if(!add) {
185	if(is.null(ylim))
186	    ylim <- range(y, 0)
187	if (missing(ylab))
188	    ylab <- if (!freq) "Density" else "Frequency"
189	plot.new()
190	plot.window(xlim, ylim, "", ...)	#-> ylim's default from 'y'
191	if(ann) title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...)
192	if(axes) {
193	    axis(1, ...)
194	    axis(2, ...)
195	}
196    }
197    rect(x$breaks[-nB], 0, x$breaks[-1L], y,
198	 col = col, border = border,
199	 angle = angle, density = density, lty = lty)
200    if((logl <- is.logical(labels) && labels) || is.character(labels))
201	text(x$mids, y,
202	     labels = if(logl) {
203		 if(freq) x$counts else round(x$density,3)
204	     } else labels,
205	     adj = c(0.5, -0.5))
206    invisible()
207}
208
209lines.histogram <- function(x, ...) plot.histogram(x, ..., add = TRUE)
210
211