# 用R语言的quantreg包进行分位数回归

## 什么是分位数回归

$y=\beta X+\epsilon$，$E(y)=\beta X$

$\min_{\xi \in \mathcal{R}} \sum \rho_{\tau}(y_i-\xi)$

## quantreg包

quantreg即：Quantile Regression，拥有条件分位数模型的估计和推断方法，包括线性、非线性和非参模型；处理单变量响应的条件分位数方法；处理删失数据的几种方法。此外，还包括基于预期风险损失的投资组合选择方法。

## 实例

library(quantreg) # 载入quantreg包

data(engel) # 加载quantreg包自带的数据集

fit1 = rq(foodexp ~ income, tau = 0.5, data = engel)

r1 = resid(fit1) # 得到残差序列，并赋值为变量r1

c1 = coef(fit1) # 得到模型的系数，并赋值给变量c1

summary(fit1) # 显示分位数回归的模型和系数



Call: rq(formula = foodexp ~ income, tau = 0.5, data = engel)

tau: [1] 0.5

Coefficients:

coefficients lower bd upper bd

(Intercept) 81.48225 53.25915 114.01156

income 0.56018 0.48702 0.60199

summary(fit1, se = "boot") # 通过设置参数se，可以得到系数的假设检验



Call: rq(formula = foodexp ~ income, tau = 0.5, data = engel)

tau: [1] 0.5

Coefficients:

Value Std. Error t value Pr(>|t|)

(Intercept) 81.48225 27.57092 2.95537 0.00344

income 0.56018 0.03507 15.97392 0.00000

fit1 = rq(foodexp ~ income, tau = 0.5, data = engel)

fit2 = rq(foodexp ~ income, tau = 0.75, data = engel)

anova(fit1,fit2) #方差分析



Quantile Regression Analysis of Deviance Table

Model: foodexp ~ income

Joint Test of Equality of Slopes: tau in { 0.5 0.75 }

Df Resid Df F value Pr(>F)

1 1 469 12.093 0.0005532 ***

---

Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

plot(engel$foodexp , engel$income,pch=20, col = "#2E8B57",

main = "家庭收入与食品支出的分位数回归",xlab="食品支出",ylab="家庭收入")

lines(fitted(fit1), engel$income,lwd=2, col = "#EEEE00") lines(fitted(fit2), engel$income,lwd=2, col = "#EE6363")

legend("topright", c("tau=.5","tau=.75"), lty=c(1,1),

col=c("#EEEE00","#EE6363"))

fit = rq(foodexp ~ income, tau = c(0.05,0.25,0.5,0.75,0.95), data = engel)

plot( summary(fit))

### 多元分位数回归

data(barro)

fit1 <- rq(y.net ~ lgdp2 + fse2 + gedy2 + Iy2 + gcony2, data = barro,tau=.25)

fit2 <- rq(y.net ~ lgdp2 + fse2 + gedy2 + Iy2 + gcony2, data = barro,tau=.50)

fit3 <- rq(y.net ~ lgdp2 + fse2 + gedy2 + Iy2 + gcony2, data = barro,tau=.75)

# 替代方式 fit <- rq(y.net ~ lgdp2 + fse2 + gedy2 + Iy2 + gcony2, method = "fn", tau = 1:4/5, data = barro)

anova(fit1,fit2,fit3) # 不同分位点模型比较-方差分析

anova(fit1,fit2,fit3,joint=FALSE)



Quantile Regression Analysis of Deviance Table

Model: y.net ~ lgdp2 + fse2 + gedy2 + Iy2 + gcony2

Tests of Equality of Distinct Slopes: tau in { 0.25 0.5 0.75 }

Df Resid Df F value Pr(>F)

lgdp2 2 481 1.0656 0.34535

fse2 2 481 2.6398 0.07241 .

gedy2 2 481 0.7862 0.45614

Iy2 2 481 0.0447 0.95632

gcony2 2 481 0.0653 0.93675

---

Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Warning message:

In summary.rq(x, se = se, covariance = TRUE) : 1 non-positive fis

plot(barro\$y.net,pch=20, col = "#2E8B57",

main = "不同分位点拟合曲线的比较")

lines(fitted(fit1),lwd=2, col = "#FF00FF")

lines(fitted(fit2),lwd=2, col = "#EEEE00")

lines(fitted(fit3),lwd=2, col = "#EE6363")

legend("topright", c("tau=.25","tau=.50","tau=.75"), lty=c(1,1),

col=c( "#FF00FF","#EEEE00","#EE6363"))

### 时间序列数据之动态线性分位数回归

library(zoo)

data("AirPassengers", package = "datasets")

ap <- log(AirPassengers)

fm <- dynrq(ap ~ trend(ap) + season(ap), tau = 1:4/5)



Dynamic quantile regression "matrix" data:

Start = 1949(1), End = 1960(12)

Call:

dynrq(formula = ap ~ trend(ap) + season(ap), tau = 1:4/5)

Coefficients:

tau= 0.2 tau= 0.4 tau= 0.6 tau= 0.8

(Intercept) 4.680165533 4.72442529 4.756389747 4.763636251

trend(ap) 0.122068032 0.11807467 0.120418846 0.122603451

season(ap)Feb -0.074408403 -0.02589716 -0.006661952 -0.013385535

season(ap)Mar 0.082349382 0.11526821 0.114939193 0.106390507

season(ap)Apr 0.062351869 0.07079315 0.063283042 0.066870808

season(ap)May 0.064763333 0.08453454 0.069344618 0.087566554

season(ap)Jun 0.195099116 0.19998275 0.194786890 0.192013960

season(ap)Jul 0.297796876 0.31034824 0.281698714 0.326054871

season(ap)Aug 0.287624540 0.30491687 0.290142727 0.275755490

season(ap)Sep 0.140938329 0.14399906 0.134373833 0.151793646

season(ap)Oct 0.002821207 0.01175582 0.013443965 0.002691383

season(ap)Nov -0.154101220 -0.12176290 -0.124004759 -0.136538575

season(ap)Dec -0.031548941 -0.01893221 -0.023048200 -0.019458814

Degrees of freedom: 144 total; 131 residual

sfm <- summary(fm)

plot(sfm)

fm1 <- dynrq(ap ~ trend(ap) + season(ap), tau = .25)

fm2 <- dynrq(ap ~ trend(ap) + season(ap), tau = .50)

fm3 <- dynrq(ap ~ trend(ap) + season(ap), tau = .75)

plot(ap,cex = .5,lwd=2, col = "#EE2C2C",main = "时间序列分位数回归")

lines(fitted(fm1),lwd=2, col = "#1874CD")

lines(fitted(fm2),lwd=2, col = "#00CD00")

lines(fitted(fm3),lwd=2, col = "#CD00CD")

legend("topright", c("原始拟合","tau=.25","tau=.50","tau=.75"), lty=c(1,1),

col=c( "#EE2C2C","#1874CD","#00CD00","#CD00CD"),cex = 0.65)