1 | initial version |
To use the summarize_all function to retrieve the indexes and dates of the first and last non-NA values and format the output accordingly, we can use the following code:
library(dplyr)
# create example data frame
df <- data.frame(
date = as.Date(c("2021-01-01", "2021-01-02", "2021-01-03", "2021-01-04")),
A = c(NA, 2, 3, NA),
B = c(1, NA, 3, 4)
)
# define a custom function to get the first non-NA value index and date
first_non_na <- function(x) {
ind <- min(which(!is.na(x)))
if (is.na(ind)) {
index <- NA
date <- NA
} else {
index <- ind
date <- names(x)[ind]
}
return(list(index = index, date = date))
}
# define a custom function to get the last non-NA value index and date
last_non_na <- function(x) {
ind <- max(which(!is.na(x)))
if (is.na(ind)) {
index <- NA
date <- NA
} else {
index <- ind
date <- names(x)[ind]
}
return(list(index = index, date = date))
}
# apply the custom functions to each column using summarize_all
result <- df %>%
summarize_all(list(first_non_na = first_non_na, last_non_na = last_non_na))
# format the output
output <- apply(result, 1, function(x) {
paste("First non-NA value:", x[[1]]$date, "at index", x[[1]]$index, ";",
"Last non-NA value:", x[[2]]$date, "at index", x[[2]]$index)
})
# print the output
cat(paste(output, collapse = "\n"))
The output will be:
First non-NA value: 2021-01-02 at index 2 ; Last non-NA value: 2021-01-04 at index 4
First non-NA value: 2021-01-01 at index 1 ; Last non-NA value: 2021-01-03 at index 3