# # Inputs # ----------------- # Usage: Counts Data (tab) Weather Data (wtb) Averaging Interval, Minutes (td) # Defaults: ct0 wt0 30 # # An R script that averages counts data and weather data over time intervals. This is a more efficient # version of a previous script. The columns of the counts data must be of the form: # date, d1, d2, d3, d4, d5, c2, c3.1, c3.2, c3.3, c4.a, c5 # The columns of the weather data must be of the form: # date, TempF, DewpointF, PressIn, WindDir, WindDirDeg, WindSpMPH, # WindSpGustMPH, Humi, HourlyPrecip, dailyrain # The output has columns: # date, d1, d2, d3, d4, d5, c2, c3.1, c3.2, c3.3, c4.a, c5, TempF, DewpointF, # PressIn, WindDir, WindDirDeg, WindSpMPH, WindSpGustMPH, Humi, # HourlyPrecip, dailyrain # # Author: Greg Smith, 2008 Stony Brook University Summer Physics REU # Date: Jul 23, 2008 # mtbcomb8 <- function(tab=ct0, wtb=wt0, td=30){ #AA <- as.POSIXct(Sys.time()) texp <- 60 #seconds (expected) dt = td*60 # averaging interval in seconds # find the start of an hour t0 <- tab$date[1] t1 <- as.POSIXlt(t0) t1$min <- 0 t1$sec <- 0 t1 <- as.POSIXct(t1) # what interval (from the beginning of an hour) we should start with frac <- (as.numeric(t0)-as.numeric(t1))/dt nf <- round(frac) + 1 # last interval t2 <- tab$date[nrow(tab)] frac <- (as.numeric(t2)-as.numeric(t1))/dt nl <- round(frac) tab1 <- data.matrix( tab[, 2:ncol(tab)] ) dvec <- tab$date wtb1 <- data.matrix( wtb[, 2:ncol(wtb)] ) wdvec <- wtb$date dvec1 <- tab$date[FALSE] tba1 <- tab1[FALSE,] wbt1 <- wtb1[FALSE,] #initialize the index numbers ndxA <- 1 ndxB <- ndxA + 1.1 * td wndxA <- NROW( wtb$date[ dvec[ndxA] >= wtb$date ] ) wndxB <- wndxA + td if (nf<=nl) { k<-1 #k is the index for dvec1[k] for (i in nf:nl) { # i is the interval index #b1,b2 are start and end times of interval b1 = t1 + (i-1)*dt b2 = b1 + dt #determine the index boundaries for counts if( (ndxA <= length(dvec)) & (ndxB <= length(dvec)) ){ dveci <- dvec[ndxA:ndxB] tba1i <- tab1[ndxA:ndxB,] }else{ if( (ndxA > length(dvec)) ){ dveci <- dvec[(length(dvec)-1.1*td):length(dvec)] tba1i <- tab1[(length(dvec)-1.1*td):length(dvec),] }else{ dveci <- dvec[ndxA:length(dvec)] tba1i <- tab1[ndxA:length(dvec),] } } #determine the index boundaries for weather if( (wndxA <= length(wdvec)) & (wndxB <= length(wdvec)) ){ wdveci <- wdvec[wndxA:wndxB] wbt1i <- wtb1[wndxA:wndxB,] }else{ if( (wndxA > length(wdvec)) ){ wdveci <- wdvec[(wndxA-td):length(wdvec)] wbt1i <- wtb1[(wndxA-td):length(wdvec)] }else{ wdveci <- wdvec[wndxA:length(wdvec)] wbt1i <- wtb1[wndxA:length(wdvec)] } } #restrict the time vector using the time boundaries dvecj <- dveci[ (b1 <= dveci)&(dveci < b2) ] tba1j <- tba1i[ (b1 <= dveci)&(dveci < b2), ] #restrict the wtime vector using the time boundaries wdvecj <- wdveci[ (b1 <= wdveci)&(wdveci < b2) ] wbt1j <- wbt1i[ (b1 <= wdveci)&(wdveci < b2), ] if( length(dvecj) == 0 ){ tmp <- dvec[dvec < b1] ndxA <- length(tmp) + 1.1*td rm(tmp) ndxB <- ndxA + 1.1 * td wndxA <- wndxA + length(wdvecj) wndxB <- wndxA + td } if( length(dvecj) == 1 ){ tba1 <- rbind(tba1, tba1j) dvec1[k] <- (b1 + dt/2) k <- (k+1) if( length(wdvecj) == 0){ wbt1 <- rbind(wbt1, rep(NA, ncol(wbt1) ) ) } if( length(wdvecj) == 1){ wbt1 <- rbind(wbt1, wbt1j) } if( length(wdvecj) > 1 ){ wbt1 <- rbind(wbt1, apply(wbt1j,2,mean, na.rm=TRUE) ) } #advance the index ndxA <- ndxA + length(dvecj) ndxB <- ndxA + 1.1 * td wndxA <- wndxA + length(wdvecj) wndxB <- wndxA + 1.1 * td } if( length(dvecj) > 1 ){ if( length(wdvecj) == 0 ){ wbt1 <- rbind(wbt1, rep(NA, ncol(wbt1) ) ) } if( length(wdvecj) == 1){ wbt1 <- rbind(wbt1, wbt1j) } if( length(wdvecj) > 1){ wbt1 <- rbind(wbt1, apply(wbt1j,2,mean, na.rm=TRUE) ) } tba1 <- rbind(tba1, apply(tba1j, 2, mean, na.rm=TRUE)) dvec1[k] <- (b1 + dt/2) k <- (k+1) #advance the index ndxA <- ndxA + length(dvecj) ndxB <- ndxA + 1.1 * td wndxA <- wndxA + length(wdvecj) wndxB <- wndxA + 1.1 * td } rm(wdveci, dveci, wdvecj, dvecj, tba1i, tba1j) } } attr(dvec1, "tzone") <- "GMT" tbn <- data.frame(dvec1, tba1, wbt1) attr(tbn,"names") <- c(attr(tab,"names"), attr(wtb[,2:ncol(wtb)], "names")) #BB <- as.POSIXct(Sys.time()) #CC <- difftime(BB, AA) #print(CC) return(tbn) }