在 for 循环中将数据帧附加在一起

小开

You should try this:

df_total = data.frame()
for (i in 1:7){
# vector output
model <- #some processing


# add vector to a dataframe
df <- data.frame(model)
df_total <- rbind(df_total,df)
}

小开

Again maRtin is correct but for this to work you have start with a dataframe that already has at least one column

model <- #some processing
df <- data.frame(col1=model)


for (i in 2:17)
{
model <- # some processing
nextcol <-  data.frame(model)
colnames(nextcol) <- c(paste("col", i, sep="")) # rename the comlum
df <- cbind(df, nextcol)
}

小开

最佳答案

Don't do it inside the loop. Make a list, then combine them outside the loop.

n = 5
datalist = list()
# or pre-allocate for slightly more efficiency
datalist = vector("list", length = n)


for (i in 1:n) {
# ... make some data
dat <- data.frame(x = rnorm(10), y = runif(10))
dat$i <- i  # maybe you want to keep track of which iteration produced it?
datalist[[i]] <- dat # add it to your list
}


big_data = do.call(rbind, datalist)
# or big_data <- dplyr::bind_rows(datalist)
# or big_data <- data.table::rbindlist(datalist)

This is a much more R-like way to do things. It can also be substantially faster, especially if you use dplyr::bind_rows or data.table::rbindlist for the final combining of data frames.

小开

In the Coursera course, an Introduction to R Programming, this skill was tested. They gave all the students 332 separate csv files and asked them to programmatically combined several of the files to calculate the mean value of the pollutant.

This was my solution:

  # create your empty dataframe so you can append to it.
combined_df <- data.frame(Date=as.Date(character()),
Sulfate=double(),
Nitrate=double(),
ID=integer())
# for loop for the range of documents to combine
for(i in min(id): max(id)) {
# using sprintf to add on leading zeros as the file names had leading zeros
read <- read.csv(paste(getwd(),"/",directory, "/",sprintf("%03d", i),".csv", sep=""))
# in your loop, add the files that you read to the combined_df
combined_df <- rbind(combined_df, read)
}

小开

Try to use rbindlist approach over rbind as it's very, very fast.

Example:

library(data.table)


##### example 1: slow processing ######


table.1 <- data.frame(x = NA, y = NA)
time.taken <- 0
for( i in 1:100) {
start.time = Sys.time()
x <- rnorm(100)
y <- x/2 +x/3
z <- cbind.data.frame(x = x, y = y)


table.1 <- rbind(table.1, z)
end.time <- Sys.time()
time.taken  <- (end.time - start.time) + time.taken


}
print(time.taken)
> Time difference of 0.1637917 secs


####example 2: faster processing #####


table.2 <- list()
t0 <- 0
for( i in 1:100) {
s0 = Sys.time()
x <- rnorm(100)
y <- x/2 + x/3


z <- cbind.data.frame(x = x, y = y)


table.2[[i]] <- z


e0 <- Sys.time()
t0  <- (e0 - s0) + t0


}
s1 = Sys.time()
table.3 <- rbindlist(table.2)
e1 = Sys.time()


t1  <- (e1-s1) + t0
t1
> Time difference of 0.03064394 secs

小开

Here are some tidyverse and custom function options that might work depending on your needs:

library(tidyverse)


# custom function to generate, filter, and mutate the data:
combine_dfs <- function(i){
data_frame(x = rnorm(5), y = runif(5)) %>%
filter(x < y) %>%
mutate(x_plus_y = x + y) %>%
mutate(i = i)
}


df <- 1:5 %>% map_df(~combine_dfs(.))
df <- map_df(1:5, ~combine_dfs(.)) # both give the same results
> df %>% head()
# A tibble: 6 x 4
x      y x_plus_y     i
<dbl>  <dbl>    <dbl> <int>
1 -0.973 0.673    -0.300     1
2 -0.553 0.0463   -0.507     1
3  0.250 0.716     0.967     2
4 -0.745 0.0640   -0.681     2
5 -0.736 0.228    -0.508     2
6 -0.365 0.496     0.131     3

You could do something similar if you had a directory of files that needed to be combined:

dir_path <- '/path/to/data/test_directory/'
list.files(dir_path)


combine_files <- function(path, file){
read_csv(paste0(path, file)) %>%
filter(a < b) %>%
mutate(a_plus_b = a + b) %>%
mutate(file_name = file)
}


df <- list.files(dir_path, '\\.csv$') %>%
map_df(~combine_files(dir_path, .))


# or if you have Excel files, using the readxl package:
combine_xl_files <- function(path, file){
readxl::read_xlsx(paste0(path, file)) %>%
filter(a < b) %>%
mutate(a_plus_b = a + b) %>%
mutate(file_name = file)
}


df <- list.files(dir_path, '\\.xlsx$') %>%
map_df(~combine_xl_files(dir_path, .))

小开

x <- c(1:10)


# empty data frame with variables ----


df <- data.frame(x1=character(),
y1=character())


for (i in x) {
a1 <- c(x1 == paste0("The number is ",x[i]),y1 == paste0("This is another number ", x[i]))
df <- rbind(df,a1)
}


names(df) <- c("st_column","nd_column")
View(df)

that might be a good way to do so....

小开

For me, it worked very simply. At first, I made an empty data.frame, then in each iteration I added one column to it. Here is my code:

df <- data.frame(modelForOneIteration)
for(i in 1:10){
model <- # some processing
df[,i] = model
}

小开

"""Produce Multiple DataFrames from the unique TFs via a Groupby"""

        i=0
dfs_list=[]
for i in range(i,len(df_CDL)):
df = df_CDL[i]
print(df,'Only 1 df_CDL')


dfs= []
for _, dataframe in df.groupby('TFs'):
print('What is going on here?15',dataframe)
dfs.append([dataframe])
dfs_list.append([dfs])

#Index any dataframe you want or loop through them all. Whatever..

       print('Test?10', dfs[1], 'Test?20')
print('What is going on here? 1', dfs_list[5], 'What is
going on here again? 2')