# R语言基础--函数构建

## 函数的编写

``````函数名 <- function(数据,参数1=默认值,...)
{

return(返回值);
}
``````

#### 函数案例A

``````rcal <- function(x,y){
z <- x^2 + y^2
result <- sqrt(z)
return(result)
}
``````

``````rcal <- function(x,y){
+     z <- x^2 + y^2
+     result <- sqrt(z)
+     return(result)
+ }
rcal(3,4)
[1] 5
``````

#### 函数案例B

``````sqtest <- function(x,y){
z1 = x^2;z2=y^2;z3=z1+z2;
return(z3);
}
sqtest(3,4)
``````

``````sqtest <- function(x,y){
+     z1 = x^2;z2=y^2;z3=z1+z2;
+     return(z3);
+ }
sqtest(3,4)
[1] 25
``````

#### 函数案例C-转换百分比

``````addPercent <- function(x){
percent <- round(x*100, digits = 1)
result <- paste(percent, "%", setp="")
return(result)
}
x <- c(0.011, 0.0022, 0.1234)
``````

``````addPercent <- function(x){
+   percent <- round(x*100, digits = 1)
+   result <- paste(percent, "%", setp="")
+   return(result)
+ }

x <- c(0.011, 0.0022, 0.1234)

[1] "1.1 % "  "0.2 % "  "12.3 % "
``````

``````addPercent <- function(x){
percent <- round(x*100, digits = 1)
result <- paste(percent, "%", setp="")
return(result)
}
x <- c(0.011, 0.0022, 0.1234)
ppaste(x)
``````

``````addPercent <- function(x){
+   percent <- round(x*100, digits = 1)
+   result <- paste(percent, "%", setp="")
+   return(result)
+ }

x <- c(0.011, 0.0022, 0.1234)

ppaste(x)
[1] "1.1 % "  "0.2 % "  "12.3 % "
``````

``````ppaste
function(x){
percent <- round(x*100, digits = 1)
result <- paste(percent, "%", setp="")
return(result)
}
``````

``````mean
function (x, ...)
UseMethod("mean")
<bytecode: 0x000000000baf0a08>
<environment: namespace:base>
sd
function (x, na.rm = FALSE)
sqrt(var(if (is.vector(x) || is.factor(x)) x else as.double(x),
na.rm = na.rm))
<bytecode: 0x0000000004c04a38>
<environment: namespace:stats>
``````

``````addPercent <- function(x){
percent <- round(x*100, digits = 1)
result <- paste(percent, "%", setp="")
return(result)
}
``````

``````addPercent <- function(x){
percent <- round(x*100, digits = 1)
result <- paste(percent, "%", setp="")
}
x <- c(0.011, 0.0022, 0.1234)
``````

``````if(!is.numeric(x)) return(NULL)，如下所示：

if(!is.numeric(x)) return(NULL)
percent <- round(x*100, digits = 1)
result <- paste(percent, "%", setp="")
}
x <- c(0.011, 0.0022, 0.1234)
y <- c("Hello, R")
``````

``````addPercent(x)
NULL
``````

``````odds <- function(x) x / (1 - x)
odds(0.8)
``````

``````odds <- function(x) x / (1 - x)
odds(0.8)
[1] 4
``````

``````addPercent <- function(x) paste(round(x * 100, digits = 1), "%", sep = "")
``````

``````addPercent <- function(x) paste(round(x * 100, digits = 1), "%", sep = "")
[1] "0.2%"
``````

``````percentages <- c(58.23, 120.4, 33)
``````

``````percentages <- c(58.23, 120.4, 33)
[1] "58.2%"  "120.4%" "33%"
``````

``````addPercent <- function(x, mult){
percent <- round(x*mult, digits = 1)
result <- paste(percent, "%", setp="")
return(result)
}
percentages <- c(58.23, 120.4, 33)
decimalstages <- c(0.001, 0.123, 1.334)
``````

``````addPercent(percentages, mult = 1)
[1] "58.2 % "  "120.4 % " "33 % "
[1] "0.1 % "   "12.3 % "  "133.4 % "
``````

``````addPercent(decimalstages)
argument "mult" is missing, with no default
``````

``````addPercent <- function(x, mult=100){
percent <- round(x*mult, digits = 1)
result <- paste(percent, "%", setp="")
return(result)
}
decimalstages <- c(0.001, 0.123, 1.334)
``````

``````addPercent(decimalstages)
[1] "0.1 % "   "12.3 % "  "133.4 % "
``````

``````pheatmap(mat, color = colorRampPalette(rev(brewer.pal(n = 7, name =
"RdYlBu")))(100), kmeans_k = NA, breaks = NA, border_color = "grey60",
cellwidth = NA, cellheight = NA, scale = "none", cluster_rows = TRUE,
cluster_cols = TRUE, clustering_distance_rows = "euclidean",
clustering_distance_cols = "euclidean", clustering_method = "complete",
clustering_callback = identity2, cutree_rows = NA, cutree_cols = NA,
treeheight_row = ifelse((class(cluster_rows) == "hclust") || cluster_rows,
50, 0), treeheight_col = ifelse((class(cluster_cols) == "hclust") ||
cluster_cols, 50, 0), legend = TRUE, legend_breaks = NA,
legend_labels = NA, annotation_row = NA, annotation_col = NA,
annotation = NA, annotation_colors = NA, annotation_legend = TRUE,
annotation_names_row = TRUE, annotation_names_col = TRUE,
drop_levels = TRUE, show_rownames = T, show_colnames = T, main = NA,
fontsize = 10, fontsize_row = fontsize, fontsize_col = fontsize,
angle_col = c("270", "0", "45", "90", "315"), display_numbers = F,
number_format = "%.2f", number_color = "grey30", fontsize_number = 0.8
* fontsize, gaps_row = NULL, gaps_col = NULL, labels_row = NULL,
labels_col = NULL, filename = NA, width = NA, height = NA,
silent = FALSE, na_col = "#DDDDDD", ...)
``````

``````addPercent <- function(x, mult=100, ...){
percent <- round(x*mult, ...)
result <- paste(percent, "%", setp="")
return(result)
}
decimalstages <- c(0.3331, 0.13323, 1.33334)
``````

``````addPercent(decimalstages, digits = 2)
[1] "33.31 % "  "13.32 % "  "133.33 % "
[1] "33 % "  "13 % "  "133 % "
``````

``````addPercent <- function(x, mult=100, FUN=round, ...){
percent <- FUN(x*mult, ...)
result <- paste(percent, "%", setp="")
return(result)
}
``````

``````addPercent <- function(x, mult=100, FUN=round, ...){
percent <- FUN(x*mult, ...)
result <- paste(percent, "%", setp="")
return(result)
}
x <- c(0.1223, 0.8956, 0.4234)
``````

`````` addPercent(x, FUN=signif, digits = 3)
[1] "12.2 % " "89.6 % " "42.3 % "
``````

R将signif()函数的代码传递给FUN参数，这样FUN()就成了signif()函数的一份拷贝，功能和行为都与之相同；
R接收参数digits并将其传递给FUN()。

``````addPercent <- function(x, mult=100, FUN=round, ...){
percent <- FUN(x*mult, ...)
result <- paste(percent, "%", setp="")
return(result)
}
profits <- c(2100, 1430, 3580, 5230)
addPercent(profits, FUN = function(x) round(x / sum(x) *100))
``````

``````profits <- c(2100, 1430, 3580, 5230)
addPercent(profits, FUN = function(x) round(x / sum(x) *100))
[1] "17 % " "12 % " "29 % " "42 % "
``````

``````addPercent <- function(x, mult=100, FUN=round, ...){
percent <- FUN(x*mult, ...)
result <- paste(percent, "%", setp="")
return(result)
}
profits <- c(2100, 1430, 3580, 5230)
``````

``````profits <- c(2100, 1430, 3580, 5230)
[1] "17 % " "12 % " "29 % " "42 % "
``````

``````round <- c(0.45, -0.45, 58.5)
``````

``````addPercent <- function(x, mult=100, FUN=round, ...){
percent <- FUN(x*mult, ...)
result <- paste(percent, "%", setp="")
return(result)
}
``````

``````round <- c(0.45, -0.45, 58.5)
Error in FUN(x * mult, ...) : could not find function "FUN"
``````

``````addPercent <- function(x, mult=100, FUN=round, ...){
FUN <- match.fun(FUN)
percent <- FUN(x*mult, ...)
result <- paste(percent, "%", setp="")
return(result)
}
round <- c(0.45, -0.45, 58.5)
``````

``````round <- c(0.45, -0.45, 58.5)
[1] "45 % "   "-45 % "  "5850 % "
``````

match.fun函数会查找与名称round相匹配的函数，并将代码复制给FUN，而不会找到round向量，另外，match.fun()还支持字符对象，例如FUN='round'这样传递参数也是有效的。

## 处理作用域

``````x <- 1:5
test <- function(x){
cat("This is x: ", x, "\n")
rm(x)
cat("This is x after removing it:", x, "\n")
}
x
test(5:1)
``````

``````x
[1] 1 2 3 4 5
test(5:1)
This is x:  5 4 3 2 1
This is x after removing it: 1 2 3 4 5
``````

### 使用内部函数

`test()`函数出现的调用全局环境对象的问题其实是没有意义的，因为我们从一开始就应该避免函数对全局环境对象的依赖。事实上，R背后隐藏的整个概念都不支持将全局变量应用到函数内部。R作为一门函数式编程语言，它的一个主要思想就是在于任何函数的输出结果都不能依赖于外部环境，而仅仅是由传入的参数决定。只要各个参数的值相同，结果就不会发生变化。这种操作的优势在于，有时候我们想要在某个函数内部重复地执行某种操作，但离开这个函数后，这一操作又是没有意义的。

``````calculate.eff <- function(x, y, control){
min.base <- function(z)  - mean(control)
min.base(x) / min.base(y)
}
``````

`ccalculate.eff()`函数内部，可以看到有另外一个函数的定义：`min.base()`，这个函数定义在`calculate.eff()`函数的本地环境中，并且也会在离开函数时销毁，也就是说，它并不存在于Workspace内。

``````calculate.eff <- function(x, y, control){
min.base <- function(z) z - mean(control)
min.base(x) / min.base(y)
}
half <- c(2.23, 3.23, 1.48)
full <- c(4.85, 4.995, 4.12)
nothing <- c(0.14, 0.18, 0.56, 0.23)
calculate.eff(half, full,nothing)
``````

``````half <- c(2.23, 3.23, 1.48)
full <- c(4.85, 4.995, 4.12)
nothing <- c(0.14, 0.18, 0.56, 0.23)
calculate.eff(half, full,nothing)

[1] 0.4270093 0.6258612 0.3129473
``````

1. 函数`calculate.eff()`创建了一个本地环境，包含对象`x`（其值为`fifty`），`y`（其值为`hundred`），`control`（值为`nothing`），以及函数`min.base()`
2. 函数`min.base()``calculate.eff()`函数内创建了一个新的本地环境，包含对象`z`，值为`x`
3. `min.base()``calculate.eff()`的环境内查找对象`control`，计算其中每个元素的平均值，并将`z`的每个元素减去这个平均值，之后返回结果；
4. 与前一个过程相同，只是`z`的值换成了`y`
5. `3``4`的结果相除，结果返回到全局环境。

### 方法分配

``````print
function (x, ...)
UseMethod("print")
``````

#### 通过`UseMethod`调用方法

`print()`仅靠那一行代码肯定是完成无法完成不同方式打印向量、数据框、列表等复杂任务的，真正完成的其实是`UseMethod()`这个函数，这个函数做的所有事情就是告诉R查找一个能够处理传入参数`x`类型相匹配的函数。R会完整地遍历定义的函数名称，查找以`print`形状，后面接一个点号加上对象类型名的函数。

``````apropos('print\\.')

[1] ".rs.rnbHooks.print.html"             ".rs.rnbHooks.print.htmlwidget"
[3] ".rs.rnbHooks.print.knit_asis"        ".rs.rnbHooks.print.knit_image_paths"
[5] ".rs.rnbHooks.print.shiny.tag"        ".rs.rnbHooks.print.shiny.tag.list"
[7] "print.AsIs"                          "print.by"
[9] "print.condition"                     "print.connection"
[11] "print.data.frame"                    "print.Date"
[13] "print.default"                       "print.difftime"
[15] "print.Dlist"                         "print.DLLInfo"
[17] "print.DLLInfoList"                   "print.DLLRegisteredRoutines"
[19] "print.eigen"                         "print.factor"
[21] "print.function"                      "print.hexmode"
[23] "print.libraryIQR"                    "print.listof"
[25] "print.NativeRoutineList"             "print.noquote"
[27] "print.numeric_version"               "print.octmode"
[29] "print.packageInfo"                   "print.POSIXct"
[31] "print.POSIXlt"                       "print.proc_time"
[33] "print.restart"                       "print.rle"
[35] "print.simple.list"                   "print.srcfile"
[37] "print.srcref"                        "print.summary.table"
[39] "print.summary.warnings"              "print.summaryDefault"
[41] "print.table"                         "print.warnings"
``````

`apropos()`括号内的引号之间可以添加正则表达式，这与`grep()`函数非常类似。假如我们要打印一个数据框，那么R将查找函数`print.data.frame()`，并使用它来打印作为参数传入的对象，我们可以手工调用这个函数，如下所示：

``````small.one <- data.frame(a = 1:2, b = 2:1)
small.one
print(small.one)
print.data.frame(small.one)
``````

``````small.one <- data.frame(a = 1:2, b = 2:1)
small.one

a b
1 1 2
2 2 1
print(small.one)

a b
1 1 2
2 2 1

print.data.frame(small.one)

a b
1 1 2
2 2 1
``````

#### 使用默认方法

``````small.one <- data.frame(a = 1:2, b = 2:1)
print.default(small.one)
``````

``````small.one <- data.frame(a = 1:2, b = 2:1)
print.default(small.one)

\$a
[1] 1 2
\$b
[1] 2 1

attr(,"class")

[1] "data.frame"
``````

#### 实现自己的通用函数

``````addPercent <- function(x, mult=100, FUN=round, ...){
FUN <- match.fun(FUN)
percent <- FUN(x*mult, ...)
result <- paste(percent, "%", setp="")
return(result)
}
``````

``````addPercent.character <- function(x){
paste(x, "%", sep="")
}
``````

``````addPercent <- function(x, ...){
}
``````

``````addPercent.character <- function(x){
paste(x, "%", sep="")
}
addPercent.numeric <- function(x, mult=100, FUN=round, ...){
FUN <- match.fun(FUN)
percent <- FUN(x*mult, ...)
result <- paste(percent, "%", setp="")
return(result)
}

}

new.numbers <- c(0.82,0.022, 1.62, 0.4)
small.one <- data.frame(a = 1:2, b = 2:1)
small.one
``````

``````addPercent(new.numbers,FUN=floor)

[1] "82 % "  "2 % "   "162 % " "40 % "

[1] "a%" "b%" "c%" "d%" "e%" "f%"

small.one <- data.frame(a = 1:2, b = 2:1)
small.one

a b
1 1 2
2 2 1

no applicable method for 'addPercent' applied to an object of class "data.frame"
``````

``````addPercent.default <- function(x){

cat('You should try a numeric or character vector.\n')

}
``````

``````addPercent.character <- function(x){

paste(x, "%", sep="")

}

addPercent.numeric <- function(x, mult=100, FUN=round, ...){

FUN <- match.fun(FUN)

percent <- FUN(x*mult, ...)

result <- paste(percent, "%", setp="")

return(result)

}

}

cat('You should try a numeric or character vector.\n')

}

new.numbers <- c(0.82,0.022, 1.62, 0.4)

small.one <- data.frame(a = 1:2, b = 2:1)

small.one

``````

``````addPercent(new.numbers,FUN=floor)

[1] "82 % "  "2 % "   "162 % " "40 % "

[1] "a%" "b%" "c%" "d%" "e%" "f%"

small.one <- data.frame(a = 1:2, b = 2:1)

small.one

a b

1 1 2

2 2 1

You should try a numeric or character vector.
``````

## 函数返回多个结果

``````vms = function(x){

xx=rev(sort(x))

# 对向量x从小到大排序，然后用ver()转换为从大到小排序

xx=xx[1:5]

# 提取前5个元素

mean(xx)

#求均值

return(list(xbar=mean(xx),top5=xx))

}

``````

``````y <- c(12,4,65,22,33,123,322,90)
vms(y)
\$xbar
[1] 126.6

\$top5

[1] 322 123  90  65  33
``````

## 使用单变量求解函数方程

``````sales <- function(price) {100 - 0.5*price}
``````

``````revenue <- function(price) {price*sales(price)}
``````

``````sales <- function(price) {100 - 0.5*price}
revenue <- function(price) {price*sales(price)}
par(mfrow = c(1, 2))
curve(sales, from = 50, to = 150, xname="price", ylab= "Sales", main="Sales")
curve(revenue, from=50, to=150, xname="price", ylab="Revenue", main="Revenue")
par(mfrow=c(1,2))
``````

``````optimise(revenue, interval = c(50, 150), maximum = TRUE)
``````

``````optimise(revenue, interval = c(50, 150), maximum = TRUE)
\$maximum

[1] 100
\$objective
[1] 5000
``````

## `replicate`函数

`replicate`函数与`rep`函数类似，`rep`函数的功能是将一个参数重复数次，如下所示：

``````rep(1,4)

[1] 1 1 1 1

rep(c(1,4),3)

[1] 1 4 1 4 1 4
``````

`replicate`函数则是把某个表达式重复计算数次，多数情况下，它们的计算结果都相同，除非是使用了随机数时才有可能不同，如下所示：

``````replicate(5, runif(1))

[1] 0.7306532 0.8837189 0.5781437 0.2151454 0.4487271

rep(runif(1),5)

[1] 0.7360304 0.7360304 0.7360304 0.7360304 0.7360304

``````

`replicate`函数的这种功能在比较复杂的例子中使用很广，例如Monte Carlo个分析中。

``````time_for_commute <- function(){

# select communication tool

mode_of_transport <- sample(

c("car", "bus", "train", "bike"),

size = 1,

prob = c(0.1, 0.2, 0.3, 0.4)

)

time <- switch(

mode_of_transport,

car = rlnorm(1, log(30), 0.5),

bus = rlnorm(1, log(40), 0.5),

train = rnorm(1, 30, 10),

bike = rnorm(1, 60, 5))

names(time) <- mode_of_transport

time

}

replicate(5, time_for_commute())

``````

``````replicate(5, time_for_commute())

train      bus      bus     bike      bus

56.38663 53.76305 29.99244 60.87293 98.38907
``````

## 参考资料

1. R语言轻松入门与提高 [法]Andrie de Vries ,[比利时]Joris Mey [法] Andrie de Vries 著
2. 学习R.[美] Richard，Cotton 著刘军 译