1# File src/library/graphics/R/hist.R 2# Part of the R package, https://www.R-project.org 3# 4# Copyright (C) 1995-2018 The R Core Team 5# 6# This program is free software; you can redistribute it and/or modify 7# it under the terms of the GNU General Public License as published by 8# the Free Software Foundation; either version 2 of the License, or 9# (at your option) any later version. 10# 11# This program is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU General Public License for more details. 15# 16# A copy of the GNU General Public License is available at 17# https://www.R-project.org/Licenses/ 18 19hist <- function(x, ...) UseMethod("hist") 20 21hist.default <- 22 function (x, breaks = "Sturges", freq = NULL, 23 probability = !freq, include.lowest= TRUE, 24 right = TRUE, density = NULL, angle = 45, 25 col = "lightgray", border = NULL, 26 main = paste("Histogram of", xname), 27 xlim = range(breaks), ylim = NULL, 28 xlab = xname, ylab, 29 axes = TRUE, plot = TRUE, labels = FALSE, nclass = NULL, 30 warn.unused = TRUE, ...) 31{ 32 if (!is.numeric(x)) 33 stop("'x' must be numeric") 34 xname <- paste(deparse(substitute(x), 500), collapse="\n") 35 n <- length(x <- x[is.finite(x)]) 36 n <- as.integer(n) 37 if(is.na(n)) stop("invalid length(x)") 38 use.br <- !missing(breaks) 39 if(use.br) { 40 if(!missing(nclass)) 41 warning("'nclass' not used when 'breaks' is specified") 42 } 43 else if(!is.null(nclass) && length(nclass) == 1L) 44 breaks <- nclass 45 use.br <- use.br && (nB <- length(breaks)) > 1L 46 if(use.br) 47 breaks <- sort(breaks) 48 else { # construct vector of breaks 49 if(!include.lowest) { 50 include.lowest <- TRUE 51 warning("'include.lowest' ignored as 'breaks' is not a vector") 52 } 53 if(is.character(breaks)) { 54 breaks <- match.arg(tolower(breaks), 55 c("sturges", "fd", 56 "freedman-diaconis", "scott")) 57 breaks <- switch(breaks, 58 sturges = nclass.Sturges(x), 59 "freedman-diaconis" =, 60 fd = nclass.FD(x), 61 scott = nclass.scott(x), 62 stop("unknown 'breaks' algorithm")) 63 } else if(is.function(breaks)) { 64 breaks <- breaks(x) 65 } 66 ## if(!is.numeric(breaks) || !is.finite(breaks) || breaks < 1L) 67 ## stop("invalid number of 'breaks'") 68 ## breaks <- pretty (range(x), n = breaks, min.n = 1) 69 ## nB <- length(breaks) 70 ## if(nB <= 1) ##-- Impossible ! 71 ## stop(gettextf("hist.default: pretty() error, breaks=%s", 72 ## format(breaks)), domain = NA) 73 if (length(breaks) == 1) { 74 if(!is.numeric(breaks) || !is.finite(breaks) || breaks < 1L) 75 stop("invalid number of 'breaks'") 76 if(breaks > 1e6) { # pretty() must have n <= maximal integer 77 warning(gettextf("'breaks = %g' is too large and set to 1e6", 78 breaks), domain = NA) 79 breaks <- 1e6L 80 } 81 breaks <- pretty (range(x), n = breaks, min.n = 1) 82 nB <- length(breaks) 83 if(nB <= 1) ##-- Impossible ! 84 stop(gettextf("hist.default: pretty() error, breaks=%s", 85 format(breaks)), domain = NA) 86 } 87 else { 88 if(!is.numeric(breaks) || length(breaks) <= 1) 89 stop(gettextf("Invalid breakpoints produced by 'breaks(x)': %s", 90 format(breaks)), domain = NA) 91 breaks <- sort(breaks) 92 nB <- length(breaks) 93 use.br <- TRUE # To allow equidist=FALSE below (FIXME: Find better way?) 94 } 95 } 96 nB <- as.integer(nB) 97 if(is.na(nB)) stop("invalid length(breaks)") 98 99 ## Do this *before* adding fuzz or logic breaks down... 100 101 h <- as.double(diff(breaks)) 102 equidist <- !use.br || diff(range(h)) < 1e-7 * mean(h) 103 if (!use.br && any(h <= 0)) 104 stop("'breaks' are not strictly increasing") 105 freq1 <- freq # we want to do missing(freq) later 106 if (is.null(freq)) { 107 freq1 <- if(!missing(probability)) !as.logical(probability) else equidist 108 } else if(!missing(probability) && any(probability == freq)) 109 stop("'probability' is an alias for '!freq', however they differ.") 110 111 ## Fuzz to handle cases where points are "effectively on" 112 ## the boundaries 113 ## As one break point could be very much larger than the others, 114 ## as from 1.9.1 we no longer use the range. (PR#6931) 115 ## diddle <- 1e-7 * max(abs(range(breaks))) ## NB: h == diff(breaks) 116 diddle <- 1e-7 * if(nB > 5) stats::median(h) 117 ## for few breaks, protect against very large bins: 118 else if(nB <= 3) diff(range(x)) else min(h[h > 0]) 119 fuzz <- if(right) 120 c(if(include.lowest) -diddle else diddle, rep.int(diddle, nB - 1L)) 121 else 122 c(rep.int(-diddle, nB - 1L), if(include.lowest) diddle else -diddle) 123 fuzzybreaks <- breaks + fuzz 124 ## With the fuzz adjustment above, the "right" and "include" 125 ## arguments are often irrelevant (but not with integer data!) 126 counts <- .Call(C_BinCount, x, fuzzybreaks, right, include.lowest) 127 if (any(counts < 0L)) 128 stop("negative 'counts'. Internal Error.", domain = NA) 129 if (sum(counts) < n) 130 stop("some 'x' not counted; maybe 'breaks' do not span range of 'x'") 131 dens <- counts/(n*h) # use un-fuzzed intervals 132 mids <- 0.5 * (breaks[-1L] + breaks[-nB]) 133 r <- structure(list(breaks = breaks, counts = counts, 134 density = dens, mids = mids, 135 xname = xname, equidist = equidist), 136 class = "histogram") 137 if (plot) { 138 plot(r, freq = freq1, col = col, border = border, 139 angle = angle, density = density, 140 main = main, xlim = xlim, ylim = ylim, xlab = xlab, ylab = ylab, 141 axes = axes, labels = labels, ...) 142 invisible(r) 143 } 144 else { ## plot is FALSE 145 if (warn.unused) { 146 ## make an effort to warn about "non sensical" arguments: 147 nf <- names(formals()) ## all formals but those: 148 nf <- nf[is.na(match(nf, c("x", "breaks", "nclass", "plot", 149 "include.lowest", "right")))] 150 missE <- lapply(nf, function(n) 151 substitute(missing(.), list(. = as.name(n)))) 152 not.miss <- ! sapply(missE, eval, envir = environment()) 153 if(any(not.miss)) 154 warning(sprintf(ngettext(sum(not.miss), 155 "argument %s is not made use of", 156 "arguments %s are not made use of"), 157 paste(sQuote(nf[not.miss]), collapse=", ")), 158 domain = NA) 159 } 160 r 161 } 162} 163 164plot.histogram <- 165 function (x, freq = equidist, density = NULL, angle = 45, 166 col = NULL, border = par("fg"), lty = NULL, 167 main = paste("Histogram of", paste(x$xname, collapse="\n")), 168 sub = NULL, 169 xlab = x$xname, ylab, 170 xlim = range(x$breaks), ylim = NULL, 171 axes = TRUE, labels = FALSE, add = FALSE, ann = TRUE, ...) 172{ 173 equidist <- 174 if(is.logical(x$equidist)) x$equidist 175 else { h <- diff(x$breaks) ; diff(range(h)) < 1e-7 * mean(h) } 176 if(freq && !equidist) 177 warning("the AREAS in the plot are wrong -- rather use 'freq = FALSE'") 178 179 y <- if (freq) x$counts else x$density 180 nB <- length(x$breaks) 181 if(is.null(y) || 0L == nB) stop("'x' is wrongly structured") 182 183 dev.hold(); on.exit(dev.flush()) 184 if(!add) { 185 if(is.null(ylim)) 186 ylim <- range(y, 0) 187 if (missing(ylab)) 188 ylab <- if (!freq) "Density" else "Frequency" 189 plot.new() 190 plot.window(xlim, ylim, "", ...) #-> ylim's default from 'y' 191 if(ann) title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...) 192 if(axes) { 193 axis(1, ...) 194 axis(2, ...) 195 } 196 } 197 rect(x$breaks[-nB], 0, x$breaks[-1L], y, 198 col = col, border = border, 199 angle = angle, density = density, lty = lty) 200 if((logl <- is.logical(labels) && labels) || is.character(labels)) 201 text(x$mids, y, 202 labels = if(logl) { 203 if(freq) x$counts else round(x$density,3) 204 } else labels, 205 adj = c(0.5, -0.5)) 206 invisible() 207} 208 209lines.histogram <- function(x, ...) plot.histogram(x, ..., add = TRUE) 210 211