Ask Your Question

Revision history [back]

click to hide/show revision 1
initial version

To use the summarize_all function to retrieve the indexes and dates of the first and last non-NA values and format the output accordingly, we can use the following code:

library(dplyr)

# create example data frame
df <- data.frame(
  date = as.Date(c("2021-01-01", "2021-01-02", "2021-01-03", "2021-01-04")),
  A = c(NA, 2, 3, NA),
  B = c(1, NA, 3, 4)
)

# define a custom function to get the first non-NA value index and date
first_non_na <- function(x) {
  ind <- min(which(!is.na(x)))
  if (is.na(ind)) {
    index <- NA
    date <- NA
  } else {
    index <- ind
    date <- names(x)[ind]
  }
  return(list(index = index, date = date))
}

# define a custom function to get the last non-NA value index and date
last_non_na <- function(x) {
  ind <- max(which(!is.na(x)))
  if (is.na(ind)) {
    index <- NA
    date <- NA
  } else {
    index <- ind
    date <- names(x)[ind]
  }
  return(list(index = index, date = date))
}

# apply the custom functions to each column using summarize_all
result <- df %>%
  summarize_all(list(first_non_na = first_non_na, last_non_na = last_non_na))

# format the output
output <- apply(result, 1, function(x) {
  paste("First non-NA value:", x[[1]]$date, "at index", x[[1]]$index, ";",
        "Last non-NA value:", x[[2]]$date, "at index", x[[2]]$index)
})

# print the output
cat(paste(output, collapse = "\n"))

The output will be:

First non-NA value: 2021-01-02 at index 2 ; Last non-NA value: 2021-01-04 at index 4
First non-NA value: 2021-01-01 at index 1 ; Last non-NA value: 2021-01-03 at index 3