Process Excel Files in R Program
Overview
If you want to import data from excel files in R program,
you have many options :
- Use R packages readxl/writexl with read_excel()/write_xlsx() functions.
- Use R package rio with import() and export() functions.
- The rio package uses same simple API calls import() and export() for
dealing with many different formats like csv, xls, xlsx, etc. - In my experience, I wanted lower level control over import/export
so I always end up using readxl package. - For data cleanup, you may want to use R stringr package for string manipuations.
Code Snippets
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
library(readxl) # read_excel(path, sheet = NULL, range = NULL, col_names = TRUE, # col_types = NULL, na = "", trim_ws = TRUE, skip = 0, n_max = Inf, # guess_max = min(1000, n_max)) # read_xls(...) # read_xlsx(...) library(writexl) x = list(mtcars = mtcars, iris = iris) # You have 2 dataframes -- Write into 2 sheets of an excel file. write_xlsx(x, path = "writexlsx-cars-iris.xlsx", col_names = TRUE) # Reading single sheets separately. irisdf = read_excel("writexlsx-iris.xlsx", sheet = 2) carsdf = read_excel("writexlsx-cars.x lsx", sheet = 1) # Just read the excel sheet names ... Not the data. sheets = ( excel_sheets( 'mydata.xlsx' ) ) # vector of names. total_sheets = length(sheets) # Limit the number of data rows read. Reads only Sheet 1 by default. df = read_excel('mydata.xlsx', n_max = 3) # Read from an Excel range using A1 or R1C1 notation df = read_excel('mydata.xlsx', range = "C1:E7") df = read_excel('mydata.xlsx', range = "R1C2:R2C5") # Specify the sheet as part of the range df = read_excel('mydata.xlsx', range = "mtcars!B1:D5") # Read only specific rows or columns read_excel('mydata.xlsx', range = cell_rows(102:151), col_names = FALSE) read_excel('mydata.xlsx', range = cell_cols("B:D")) # Read all sheets from excel sheet. # There is no API like read_excel_all_sheets() - # So we have to do something like below... sheet_names = excel_sheets(path) #> [1] "iris" "mtcars" "chickwts" "quakes" path <- readxl_example("datasets.xls") # [1] "/home/user/R/x86_64-pc-linux-gnu-library/3.5/readxl/extdata/datasets.xlsx" df_list = lapply(sheet_names, read_excel, path = path) |