XLConnect:一个用R处理Excel文件的高效平台

来源:转载

XLConnect:一个用R处理Excel文件的高效平台

read.table(),read.csv(),read.delim()等函数可以直接读取EXCEl文件,但或多或少总会遇到一些问题。XLConnect函数包,是一个可以用R处理Excel文件的高效平台。利用它可以读取或创建一个XLSX文件,并对文件进行数据处理,对文本内数据进行标记,以及可视化。

创建读取xlsl文件

require("XLConnect")

## Loading required package: XLConnect

## Loading required package: XLConnectJars

## XLConnect 0.2-11 by Mirai Solutions GmbH [aut],

## Martin Studer [cre],

## The Apache Software Foundation [ctb, cph] (Apache POI, Apache Commons

## Codec),

## Stephen Colebourne [ctb, cph] (Joda-Time Java library)

## http://www.mirai-solutions.com ,

## http://miraisolutions.wordpress.com

# 读取或创建一个XLSX文件,此步相当于建立一个连接

xls <- loadWorkbook('C:/Users/ShangFR/Desktop/test.xlsx',create=TRUE)

# 创建工作表

createSheet(xls,name='namesheet')

# 写入数据

writeWorksheet(xls,iris,'namesheet',

startRow=5,startCol=5, # 数据出现的左上角位置

header=TRUE)

# 存入硬盘,直到此步方才有文档生成

saveWorkbook(xls)

# 上面四个步骤是新建文档、新建工作表、写入数据、最后存盘。如果要写入数据的同时创建好区域名称,则在第三步有所不同。

# 创建区域名

createName(xls,name='nameregion',

formula='namesheet!$C$5', #区域的左上角单元格位置

overwrite=TRUE)

# 写入数据

writeNamedRegion(xls,iris,name='nameregion')

# 读取文档则简单的多

data <- readWorksheet(xls, 'namesheet',

startRow=1, startCol=1,

endRow=0,endCol=0, #取0表示自动判断

header=TRUE)

文件内数据标记、处理和可视化

一、创建汇率excel

#一、创建汇率excel

require(XLConnect)

require(zoo)

## Loading required package: zoo

##

## Attaching package: 'zoo'

##

## The following objects are masked from 'package:base':

##

## as.Date, as.Date.numeric

require(ggplot2) # >= 0.9.3

## Loading required package: ggplot2

curr = XLConnect::swissfranc

curr = curr[order(curr$Date),]

wbFilename = "swiss_franc.xlsx"

wb = loadWorkbook(wbFilename, create = TRUE)

# Create a new sheet named 'Swiss_Franc'

sheet = "Swiss_Franc"

createSheet(wb, name = sheet)

# Create a new Excel name referring to the top left corner

# of the sheet 'Swiss_Franc' - this name is going to hold

# our currency data

dataName = "currency"

nameLocation = paste(sheet, "$A$1", sep = "!")

createName(wb, name = dataName, formula = nameLocation)

# Instruct XLConnect to only apply a data format for a cell

# but not to apply any other cell styling

setStyleAction(wb, XLC$"STYLE_ACTION.DATA_FORMAT_ONLY")

# Set the default format for numeric data to display

# four digits after the decimal point

setDataFormatForType(wb, type = XLC$"DATA_TYPE.NUMERIC", format = "0.0000")

# Write the currency data to the named region created above

# Note: the named region will be automatically redefined to encompass all

# written data

writeNamedRegion(wb, data = curr, name = dataName, header = TRUE)

# Save the workbook (this actually writes the file to disk)

saveWorkbook(wb)

#二、颜色标记-特殊值

# Load the workbook created above

wb = loadWorkbook(wbFilename)

# Create a cell style for the header row

csHeader = createCellStyle(wb, name = "header")

setFillPattern(csHeader, fill = XLC$FILL.SOLID_FOREGROUND)

setFillForegroundColor(csHeader, color = XLC$COLOR.GREY_25_PERCENT)

# Create a date cell style with a custom format for the Date column

csDate = createCellStyle(wb, name = "date")

setDataFormat(csDate, format = "yyyy-mm-dd")

# Create a highlighting cell style

csHlight = createCellStyle(wb, name = "highlight")

setFillPattern(csHlight, fill = XLC$FILL.SOLID_FOREGROUND)

setFillForegroundColor(csHlight, color = XLC$COLOR.CORNFLOWER_BLUE)

# Apply header cell style to the header row

setCellStyle(wb, sheet = sheet, row = 1,

col = seq(length.out = ncol(curr)),

cellstyle = csHeader)

# Index for all rows except header row

allRows = seq(length = nrow(curr)) + 1

# Apply date cell style to the Date column

setCellStyle(wb, sheet = sheet, row = allRows, col = 1,cellstyle = csDate)

# Set column width such that the full date column is visible

setColumnWidth(wb, sheet = sheet, column = 1, width = 2800)

# Check if there was a change of more than 2% compared

# to the previous day (per currency)

idx = rollapply(curr[, -1], width = 2,

FUN = function(x) abs(x[2] / x[1] - 1),

by.column = TRUE) > 0.02

idx = rbind(rep(FALSE, ncol(idx)), idx)

widx = lapply(as.data.frame(idx), which)

# Apply highlighting cell style

for(i in seq(along = widx)) {

if(length(widx[[i]]) > 0) {

setCellStyle(wb, sheet = sheet, row = widx[[i]] + 1, col = i + 1,cellstyle = csHlight)

}

}

saveWorkbook(wb)

#三、添加汇率趋势图

wb = loadWorkbook(wbFilename)

# Stack currencies into a currency variable (for use with ggplot2 below)

currencies = names(curr)[-1]

gcurr = reshape(curr, varying = currencies, direction = "long",

v.names = "Value", times = currencies, timevar = "Currency")

# Create a png graph showing the currencies in the context

# of the Swiss Franc

png(filename = "swiss_franc.png", width = 800, height = 600)

p = ggplot(gcurr, aes(Date, Value, colour = Currency)) +

geom_line() + stat_smooth(method = "loess") +

scale_y_continuous("Exchange Rate CHF/CUR") +

labs(title = paste0("CHF vs ", paste(currencies, collapse = ", ")),

x = "") +

theme(axis.title.y = element_text(size = 10, angle = 90, vjust = 0.3))

print(p)

dev.off()

## png

## 2

 p

 # Define where the image should be placed via a named region;

# let's put the image two columns left to the data starting

# in the 5th row

createName(wb, name = "graph",

formula = paste(sheet, idx2cref(c(5, ncol(curr) + 2)), sep = "!"))

# Put the image created above at the corresponding location

addImage(wb, filename = "swiss_franc.png", name = "graph",

originalSize = TRUE)

saveWorkbook(wb)

XLConnect的帮助文档内有详细介绍,感兴趣的可直接参考。

反馈与建议

  • 作者:ShangFR
  • 邮箱:shangfr@foxmail.com

分享给朋友:
您可能感兴趣的文章:
随机阅读: