From 5b9fa7c15d4bbc2a8b2cbe61805dadde8a042b62 Mon Sep 17 00:00:00 2001 From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de> Date: Sun, 28 Jan 2024 21:31:22 +0100 Subject: [PATCH 1/9] add new "simulate GC" info file, and add new IF statements for filter/correction * add new option in main_script.R * use the PGA data to simulate GC data, and run only specific filter/correction functions * simulateGCdata <- "F" # [F,T] * add new additional_info/simulate_gc_n2o_data.txt containing, for now: * list of filter/correction functions to run with 'f_correct_n2o_conc_sorted_by_measurementperiods' * simGCfilterCorrFunctions <- c(1, 2, 3, 4, 8, 9, 10, 11, 12) * add new IF statements to 'f_correct_n2o_conc_sorted_by_measurementperiods' to run only selected functions if simulateGCdata == "T" * tested: works --- .../additional_info/simulate_gc_n2o_data.txt | 3 + .../correct_filter_n2o_conc_by_meas_period.R | 125 +++++++++++------- preprocessing/main_script.R | 5 +- 3 files changed, 85 insertions(+), 48 deletions(-) create mode 100644 preprocessing/additional_info/simulate_gc_n2o_data.txt diff --git a/preprocessing/additional_info/simulate_gc_n2o_data.txt b/preprocessing/additional_info/simulate_gc_n2o_data.txt new file mode 100644 index 0000000..e04cf8e --- /dev/null +++ b/preprocessing/additional_info/simulate_gc_n2o_data.txt @@ -0,0 +1,3 @@ +# list of filter/correction functions to run with 'f_correct_n2o_conc_sorted_by_measurementperiods' +simGCfilterCorrFunctions <- c(1, 2, 3, 4, 8, 9, 10, 11, 12) + diff --git a/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R b/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R index 3227f3e..b407981 100644 --- a/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R +++ b/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R @@ -1,20 +1,27 @@ # main filter script for correcting GHG concentrations before flux calculation f_correct_n2o_conc_sorted_by_measurementperiods <- function(measPeriodMetaDataDF_mID, - measPeriodN2o_mID, - measID, - mainDir, - logfile_n2o_DataCorrection, - username){ + measPeriodN2o_mID, + measID, + mainDir, + logfile_n2o_DataCorrection, + username, + simulateGCdata){ print(paste0(">> start N2O data filter/correction for mp: ", measID)) + if (simulateGCdata == "T"){ + print(">> simulateGCdata == TRUE, running only specific filter/correction functions!") + } # sequence row numbers of measPeriodN2o_mID to 1:x rownames(measPeriodN2o_mID) <- seq(1:nrow(measPeriodN2o_mID)) - # load parameters needed for correcting and filtering concentrations + # load parameters needed for correcting and filtering concentrations source("additional_info/correction_filter_n2o.txt") + # load the info file for simulateGCdata (in particular 'simGCfilterCorrFunctions') + source("additional_info/simulate_gc_n2o_data.txt") + # absolute path to logfile logfile_n2o_DataCorrection <- paste0(warningDir, "/", logfile_n2o_DataCorrection) @@ -28,32 +35,40 @@ f_correct_n2o_conc_sorted_by_measurementperiods <- function(measPeriodMetaDataDF # 01 test for min and max length of measurement period # if thresholds are exceeded: set mp invalid if (measPeriodMetaDataDF_mID$is_mp_valid > 0){ - measPeriodMetaDataDF_mID <- data_filter_function_01(measPeriodMetaDataDF_mID, measPeriodN2o_mID, - measID, logfile_n2o_DataCorrection, - minLengthMPSec, maxLengthMPSec) + if (simulateGCdata == "F" | (simulateGCdata == "T" & 1 %in% simGCfilterCorrFunctions)){ + measPeriodMetaDataDF_mID <- data_filter_function_01(measPeriodMetaDataDF_mID, measPeriodN2o_mID, + measID, logfile_n2o_DataCorrection, + minLengthMPSec, maxLengthMPSec) + } } # 02 remove data points (DF rows) within specific time-interval at start and end of measurement period if (measPeriodMetaDataDF_mID$is_mp_valid > 0){ - measPeriodN2o_mID <- data_filter_function_02(measPeriodMetaDataDF_mID, measPeriodN2o_mID, - measID, logfile_n2o_DataCorrection, - startRemoveDataSec, endRemoveDataSec) + if (simulateGCdata == "F" | (simulateGCdata == "T" & 2 %in% simGCfilterCorrFunctions)){ + measPeriodN2o_mID <- data_filter_function_02(measPeriodMetaDataDF_mID, measPeriodN2o_mID, + measID, logfile_n2o_DataCorrection, + startRemoveDataSec, endRemoveDataSec) + } } # 03 test if N2O measurements always have the same concentration within threshold (e.g. (instrument mistake)) # if yes: set mp invalid if (measPeriodMetaDataDF_mID$is_mp_valid > 0){ - measPeriodMetaDataDF_mID <- data_filter_function_03(measPeriodMetaDataDF_mID, measPeriodN2o_mID, - measID, logfile_n2o_DataCorrection, - maxDiffFlatLine_mp) + if (simulateGCdata == "F" | (simulateGCdata == "T" & 3 %in% simGCfilterCorrFunctions)){ + measPeriodMetaDataDF_mID <- data_filter_function_03(measPeriodMetaDataDF_mID, measPeriodN2o_mID, + measID, logfile_n2o_DataCorrection, + maxDiffFlatLine_mp) + } } # 04 test if PAR changes abruptly within specific time period # if yes: set mp invalid if (measPeriodMetaDataDF_mID$is_mp_valid > 0){ - measPeriodMetaDataDF_mID <- data_filter_function_04(measPeriodMetaDataDF_mID, measPeriodN2o_mID, - measID, logfile_n2o_DataCorrection, - maxPARChange, changeWithinTimeSec) + if (simulateGCdata == "F" | (simulateGCdata == "T" & 4 %in% simGCfilterCorrFunctions)){ + measPeriodMetaDataDF_mID <- data_filter_function_04(measPeriodMetaDataDF_mID, measPeriodN2o_mID, + measID, logfile_n2o_DataCorrection, + maxPARChange, changeWithinTimeSec) + } } # 05 detect short N2O flatlines @@ -62,69 +77,85 @@ f_correct_n2o_conc_sorted_by_measurementperiods <- function(measPeriodMetaDataDF # by default do not remove these measurements, and interpolate them with function 06 # if number of measurements exceeds maxProportionFlatLines: set mp invalid if (measPeriodMetaDataDF_mID$is_mp_valid > 0){ - list_hlp <- data_filter_function_05(measPeriodMetaDataDF_mID, measPeriodN2o_mID, - measID, logfile_n2o_DataCorrection, - maxDiffFlatLineFromMedian_short, minFlatLineLengthSec, - removeFlatLines_short, constantToReplaceFlatLines_short, maxProportionFlatLines) - measPeriodMetaDataDF_mID <- as.data.frame(list_hlp[[1]]) - measPeriodN2o_mID <- as.data.frame(list_hlp[[2]]) + if (simulateGCdata == "F" | (simulateGCdata == "T" & 5 %in% simGCfilterCorrFunctions & 6 %in% simGCfilterCorrFunctions)){ + list_hlp <- data_filter_function_05(measPeriodMetaDataDF_mID, measPeriodN2o_mID, + measID, logfile_n2o_DataCorrection, + maxDiffFlatLineFromMedian_short, minFlatLineLengthSec, + removeFlatLines_short, constantToReplaceFlatLines_short, maxProportionFlatLines) + measPeriodMetaDataDF_mID <- as.data.frame(list_hlp[[1]]) + measPeriodN2o_mID <- as.data.frame(list_hlp[[2]]) + } } # 06 linear interpolation of short N2O flatlines # only if these short flatlines were not removed with function 05 ('removeFlatLines_short == F') if (removeFlatLines_short == "F" & measPeriodMetaDataDF_mID$is_mp_valid > 0){ - measPeriodN2o_mID <- data_filter_function_06(measPeriodMetaDataDF_mID, measPeriodN2o_mID, - measID, logfile_n2o_DataCorrection, - constantToReplaceFlatLines_short, timePeriodAroundFlatlinesSec, - minDistanceBetweenFlatlinesSec) + if (simulateGCdata == "F" | (simulateGCdata == "T" & 5 %in% simGCfilterCorrFunctions & 6 %in% simGCfilterCorrFunctions)){ + measPeriodN2o_mID <- data_filter_function_06(measPeriodMetaDataDF_mID, measPeriodN2o_mID, + measID, logfile_n2o_DataCorrection, + constantToReplaceFlatLines_short, timePeriodAroundFlatlinesSec, + minDistanceBetweenFlatlinesSec) + } } # 07 correct / interpolation N2O values beyond reasonable values if (measPeriodMetaDataDF_mID$is_mp_valid > 0){ - measPeriodN2o_mID <- data_filter_function_07(measPeriodMetaDataDF_mID, measPeriodN2o_mID, - measID, logfile_n2o_DataCorrection, - minValueN2O, maxValueN2O, - timePeriodAroundOutliersSec) + if (simulateGCdata == "F" | (simulateGCdata == "T" & 7 %in% simGCfilterCorrFunctions)){ + measPeriodN2o_mID <- data_filter_function_07(measPeriodMetaDataDF_mID, measPeriodN2o_mID, + measID, logfile_n2o_DataCorrection, + minValueN2O, maxValueN2O, + timePeriodAroundOutliersSec) + } } # 08 set unreasonable values of soil temperature to NA # 4 columns from 4 sensors # column names must contain "soilT" if (measPeriodMetaDataDF_mID$is_mp_valid > 0){ - measPeriodN2o_mID <- data_filter_function_08(measPeriodMetaDataDF_mID, measPeriodN2o_mID, - measID, logfile_n2o_DataCorrection, - minValueTSoilDegC, maxValueTSoilDegC) + if (simulateGCdata == "F" | (simulateGCdata == "T" & 8 %in% simGCfilterCorrFunctions)){ + measPeriodN2o_mID <- data_filter_function_08(measPeriodMetaDataDF_mID, measPeriodN2o_mID, + measID, logfile_n2o_DataCorrection, + minValueTSoilDegC, maxValueTSoilDegC) + } } # 09 set unreasonable values of "VWC soil 12 cm" to NA (volumetric water content at 12 cm depth) if (measPeriodMetaDataDF_mID$is_mp_valid > 0){ - measPeriodN2o_mID <- data_filter_function_09(measPeriodMetaDataDF_mID, measPeriodN2o_mID, - measID, logfile_n2o_DataCorrection, - minValueVWCsoil12, maxValueVWCsoil12) + if (simulateGCdata == "F" | (simulateGCdata == "T" & 9 %in% simGCfilterCorrFunctions)){ + measPeriodN2o_mID <- data_filter_function_09(measPeriodMetaDataDF_mID, measPeriodN2o_mID, + measID, logfile_n2o_DataCorrection, + minValueVWCsoil12, maxValueVWCsoil12) + } } # 10 set unreasonable values of "VWC soil 30 cm" to NA (volumetric water content at 30 cm depth) # all values would be 0 in case the sensor-device could not be inserted into the soil because of frozen soil # -> add to logfile as "frozen soil [F,T]" if (measPeriodMetaDataDF_mID$is_mp_valid > 0){ - measPeriodN2o_mID <- data_filter_function_10(measPeriodMetaDataDF_mID, measPeriodN2o_mID, - measID, logfile_n2o_DataCorrection, - minValueVWCsoil30, maxValueVWCsoil30) + if (simulateGCdata == "F" | (simulateGCdata == "T" & 10 %in% simGCfilterCorrFunctions)){ + measPeriodN2o_mID <- data_filter_function_10(measPeriodMetaDataDF_mID, measPeriodN2o_mID, + measID, logfile_n2o_DataCorrection, + minValueVWCsoil30, maxValueVWCsoil30) + } } # 11 set PAR negative values to zero, and unreasonably high values to NA if (measPeriodMetaDataDF_mID$is_mp_valid > 0){ - measPeriodN2o_mID <- data_filter_function_11(measPeriodMetaDataDF_mID, measPeriodN2o_mID, - measID, logfile_n2o_DataCorrection, - minValuePAR, maxValuePAR) + if (simulateGCdata == "F" | (simulateGCdata == "T" & 11 %in% simGCfilterCorrFunctions)){ + measPeriodN2o_mID <- data_filter_function_11(measPeriodMetaDataDF_mID, measPeriodN2o_mID, + measID, logfile_n2o_DataCorrection, + minValuePAR, maxValuePAR) + } } # 12 test for negative air temperatures in the chamber ('chamberTdegC') # if yes: set mp invalid if (measPeriodMetaDataDF_mID$is_mp_valid > 0){ - measPeriodMetaDataDF_mID <- data_filter_function_12(measPeriodMetaDataDF_mID, measPeriodN2o_mID, - measID, logfile_n2o_DataCorrection, - minValueTairChamberDegC) + if (simulateGCdata == "F" | (simulateGCdata == "T" & 12 %in% simGCfilterCorrFunctions)){ + measPeriodMetaDataDF_mID <- data_filter_function_12(measPeriodMetaDataDF_mID, measPeriodN2o_mID, + measID, logfile_n2o_DataCorrection, + minValueTairChamberDegC) + } } return(list(measPeriodMetaDataDF_mID, measPeriodN2o_mID)) diff --git a/preprocessing/main_script.R b/preprocessing/main_script.R index 013d266..7445e56 100644 --- a/preprocessing/main_script.R +++ b/preprocessing/main_script.R @@ -40,6 +40,8 @@ if (measCamp == "202307") process_aeris_raw_data <- "T" ## read raw data from # plot figures [T,F] create_plots_ghg_conc <- "F" +# use the PGA data to simulate GC data, and run only specific filter/correction functions +simulateGCdata <- "F" # [F,T] # =============================================================================================== # @@ -213,7 +215,8 @@ for (row in 1:nrow(measPeriodMetaDataDF)){ measID, mainDir, logfile_n2o_DataCorrection, - username) + username, + simulateGCdata) # extract vector and data.frame from list measPeriodMetaDataDF[row,] <- as.data.frame(list_hlp[[1]]) measPeriodN2oCorr <- rbind(measPeriodN2oCorr, as.data.frame(list_hlp[[2]])) -- GitLab From ae5c7c3b0abc7f180b1f93a8f5cfdc6c43221b33 Mon Sep 17 00:00:00 2001 From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de> Date: Sun, 28 Jan 2024 22:28:58 +0100 Subject: [PATCH 2/9] add parameters to simGC info file and prepare simGC sampling * add new section 6 in main_script.R * this will call the simGC sampling function * modify section 7 to use either the GC or PGA DF * tested: works with: simulateGCdata <- "F" --- .../additional_info/simulate_gc_n2o_data.txt | 11 +++- preprocessing/main_script.R | 51 +++++++++++++++---- 2 files changed, 52 insertions(+), 10 deletions(-) diff --git a/preprocessing/additional_info/simulate_gc_n2o_data.txt b/preprocessing/additional_info/simulate_gc_n2o_data.txt index e04cf8e..8fe486c 100644 --- a/preprocessing/additional_info/simulate_gc_n2o_data.txt +++ b/preprocessing/additional_info/simulate_gc_n2o_data.txt @@ -1,3 +1,12 @@ # list of filter/correction functions to run with 'f_correct_n2o_conc_sorted_by_measurementperiods' -simGCfilterCorrFunctions <- c(1, 2, 3, 4, 8, 9, 10, 11, 12) +simGCfilterCorrFunctions <- c(1, 2, 3, 4, 8, 9, 10, 11, 12) +# min length of the mp to be used for "GC simulation" +simGCminLengthMPSec <- 560 # [seconds] NOTE the 'data_filter_function_02' might remove some datapoints of each mp + +# time in seconds the GC samples are taken (per mp; from mp start) +# this also defines the number of samples +simGCsampleTimeSec <- c(100, 200, 300, 400, 500) # time [s] after mp start for taking samples + +# time in seconds before and after a sampling point to calculate (median) sampling value +simGCtimespanBeforeAfterSampleSec <- 5 # timespan before and after a sample used for calc of sample value diff --git a/preprocessing/main_script.R b/preprocessing/main_script.R index 7445e56..f485973 100644 --- a/preprocessing/main_script.R +++ b/preprocessing/main_script.R @@ -258,9 +258,42 @@ if (process_n2o == "T"){ ######################################################################################### ############### 6 create DF with only valid measurement periods ######################### +if (simulateGCdata == "T"){ + print("") + print(">> sample data for GC simulation") + + # TODO implement: + # * add a new column to the measPeriodN2o DF: + # * values [0,1] + # * mark all data points that were detected as (short) flatline with 1 + # * add this to data_filter_function_05 + + # call function + # use only valid mp + # write logfile (each sample value & whether it is part of flatline or value(s) for median() were taken from flatline) + # this function needs to check once that: + # length(simGCsampleTimeSec) > 0 + # simGCsampleTimeSec[1] is larger than simGCtimespanBeforeAfterSampleSec + # simGCsampleTimeSec[last] is less than "simGCminLengthMPSec - simGCtimespanBeforeAfterSampleSec" + # simGCsampleTimeSec[1] is lowest value of simGCsampleTimeSec + # simGCsampleTimeSec[last] is largest value of simGCsampleTimeSec + # then: loop over valid mp and run sampling + # rebuild the measPeriodN2oCorr DF but replace the values of the samples with the median() values + # new DF: simGCsamplesDF +} + +######################################################################################### +############### 7 create DF with only valid measurement periods ######################### print("") print(">> create DF with only valid measurement periods") +# get the respective DF +if (simulateGCdata == "F"){ + measPeriodN2oSection7inDF <- measPeriodN2oCorr +} else if (simulateGCdata == "T"){ + measPeriodN2oSection7inDF <- simGCsamplesDF +} + # collect all rows of invalid mp rowsAllInvalidMP <- vector() listAllInvalidMP <- vector() @@ -271,7 +304,7 @@ for (row in 1:nrow(measPeriodMetaDataDF)){ # only invalid mp if (measPeriodMetaDataDF$is_mp_valid[row] == 0){ measID <- measPeriodMetaDataDF$meas_ID[row] - rowsInvalidMP <- which(measPeriodN2oCorr$meas_ID == measID) + rowsInvalidMP <- which(measPeriodN2oSection7inDF$meas_ID == measID) rowsAllInvalidMP <- c(rowsAllInvalidMP, rowsInvalidMP) listAllInvalidMP <- c(listAllInvalidMP, measID) } @@ -280,24 +313,24 @@ for (row in 1:nrow(measPeriodMetaDataDF)){ # copy only valid mp to new DF if (length(rowsAllInvalidMP) > 0){ - measPeriodN2oCorr_validonly <- measPeriodN2oCorr[-rowsAllInvalidMP, ] + measPeriodN2oSection7outDF <- measPeriodN2oSection7inDF[-rowsAllInvalidMP, ] # print number of mp from DF - print(paste0(" number of valid and invalid mp: ", length(unique(measPeriodN2oCorr$meas_ID)))) - print(paste0(" number of valid mp: ", length(unique(measPeriodN2oCorr_validonly$meas_ID)))) + print(paste0(" number of valid and invalid mp: ", length(unique(measPeriodN2oSection7inDF$meas_ID)))) + print(paste0(" number of valid mp: ", length(unique(measPeriodN2oSection7outDF$meas_ID)))) print(paste0(" list of ", length(listAllInvalidMP)," invalid mp: ")) print(listAllInvalidMP) - # sequence row number of measPeriodN2oCorr_validonly to 1:x - rownames(measPeriodN2oCorr_validonly) <- seq(1:nrow(measPeriodN2oCorr_validonly)) + # sequence row number of measPeriodN2oSection7outDF to 1:x + rownames(measPeriodN2oSection7outDF) <- seq(1:nrow(measPeriodN2oSection7outDF)) } else{ - measPeriodN2oCorr_validonly <- measPeriodN2oCorr + measPeriodN2oSection7outDF <- measPeriodN2oSection7inDF print(" did not find any invalid mp") } ######################################################################################### -############### 7 end of pre-processing & pass data DF to new DF used for calculations ## +############### 8 end of pre-processing & pass data DF to new DF used for calculations ## # create DF used with analysis and calculations -ppN2OdataDF <- measPeriodN2oCorr_validonly +ppN2OdataDF <- measPeriodN2oSection7outDF # =============================================================================================== # -- GitLab From 35cdd7e15b41275fb01363765223ad02716f3f49 Mon Sep 17 00:00:00 2001 From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de> Date: Tue, 30 Jan 2024 21:40:00 +0100 Subject: [PATCH 3/9] add additional column to N2O DF to mark short flatlines * added in section 4 * column name: detected_flatline * values: * 0 = no flatline detected [default] * 1 = flatline detected * the 'data_filter_function_05' in correct_filter_n2o_conc_by_meas_period.R does modify the column value * tested: works --- .../functions/correct_filter_n2o_conc_by_meas_period.R | 8 ++++++-- preprocessing/main_script.R | 4 ++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R b/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R index b407981..41dbac0 100644 --- a/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R +++ b/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R @@ -419,13 +419,17 @@ data_filter_function_05 <- function(measPeriodMetaDataDF_mID, } } - # remove rows that are part of flat lines, or set them to -9999 and interpolate linearely with function 06 + # either remove rows that are part of flat lines + # or set them to 'constantToReplaceFlatLines_short' and interpolate linearely with function 06 if (sum(measurementsFlatLine) > 0){ flatLineRowsRemove <- which(measurementsFlatLine > 0) if (removeFlatLines_short == "T"){ measPeriodN2o_mID <- measPeriodN2o_mID[-flatLineRowsRemove,] } else if(removeFlatLines_short == "F"){ - measPeriodN2o_mID$n2oppm_aeris[flatLineRowsRemove] <- constantToReplaceFlatLines_short + # mark rows for 'data_filter_function_06' with a specific constant + measPeriodN2o_mID$n2oppm_aeris[flatLineRowsRemove] <- constantToReplaceFlatLines_short + # mark rows in an additional column for later analysis + measPeriodN2o_mID$detected_flatline[flatLineRowsRemove] <- 1 } } diff --git a/preprocessing/main_script.R b/preprocessing/main_script.R index f485973..e859f47 100644 --- a/preprocessing/main_script.R +++ b/preprocessing/main_script.R @@ -203,6 +203,10 @@ source("functions/correct_filter_n2o_conc_by_meas_period.R") measPeriodN2oCorr <- data.frame() # create logfile name logfile_n2o_DataCorrection <- paste0("n2o_dataCorrection_for_measCamp_", measCamp, "_", currentDateTime, ".log") +# add additional column to mark detected short flat lines [0,1] (see 'data_filter_function_05') +newColumn <- c(rep(0, nrow(measPeriodN2o))) +measPeriodN2o$detected_flatline <- newColumn +remove(newColumn) # loop over all mp with N2O data for (row in 1:nrow(measPeriodMetaDataDF)){ # for now only with Aeris data -- GitLab From b32b70005e72d0345b8bee123bca5a99ab570ee5 Mon Sep 17 00:00:00 2001 From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de> Date: Tue, 30 Jan 2024 23:10:19 +0100 Subject: [PATCH 4/9] add new file with function for gas-concentration sampling * no functional code, just function call and empty function --- ..._concentrations_for_gc_simulation_per_mp.R | 14 ++++++++++ preprocessing/main_script.R | 28 ++++++++++++++++++- 2 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R diff --git a/preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R b/preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R new file mode 100644 index 0000000..2965478 --- /dev/null +++ b/preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R @@ -0,0 +1,14 @@ +# sample specific data from gas concentration measurements with PGA +# sampling per measurement period +# return DF with only these few sampled data + +f_sample_gas_concentrations_for_gc_simulation_per_mp <- function(mpGasConcDataDF, + measID, + mainDir, + logfile_gas_conc_sampling, + username){ + + + + +} \ No newline at end of file diff --git a/preprocessing/main_script.R b/preprocessing/main_script.R index e859f47..acad2c1 100644 --- a/preprocessing/main_script.R +++ b/preprocessing/main_script.R @@ -264,7 +264,14 @@ if (process_n2o == "T"){ ############### 6 create DF with only valid measurement periods ######################### if (simulateGCdata == "T"){ print("") - print(">> sample data for GC simulation") + print(">> start sampling data for GC simulation") + + # gas-concentration data sampling for simulation of GC (gas chromatograph) + source("functions/sample_gas_concentrations_for_gc_simulation_per_mp.R") + # create new empty DF for sampled data + simGCsamplesDF <- data.frame() + # create logfile name + logfile_simGC_gas_conc_data_sampling <- paste0("sampling_gas_concentrations_for_gc_simulation_", measCamp, "_", currentDateTime, ".log") # TODO implement: # * add a new column to the measPeriodN2o DF: @@ -284,6 +291,25 @@ if (simulateGCdata == "T"){ # then: loop over valid mp and run sampling # rebuild the measPeriodN2oCorr DF but replace the values of the samples with the median() values # new DF: simGCsamplesDF + + + # loop over all mp with N2O data + for (row in 1:nrow(measPeriodMetaDataDF)){ + # for now only with Aeris data + if (measPeriodMetaDataDF$is_n2o_aeris[row] == 1){ + measID <- measPeriodMetaDataDF$meas_ID[row] + rowsGasConcData <- which(measPeriodN2oCorr$meas_ID == measID) + # returns data of this mp for the new DF + simGCsamplesDFmp <- f_sample_gas_concentrations_for_gc_simulation_per_mp(measPeriodN2oCorr[rowsGasConcData,], + measID, + mainDir, + logfile_simGC_gas_conc_data_sampling, + username) + simGCsamplesDF <- rbind(simGCsamplesDF, as.data.frame(simGCsamplesDFmp)) + } + } + # sequence row number of simGCsamplesDF to 1:x + rownames(simGCsamplesDF) <- seq(1:nrow(simGCsamplesDF)) } ######################################################################################### -- GitLab From 13b3ba20a07182f80255a97d905dff6a05422be7 Mon Sep 17 00:00:00 2001 From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de> Date: Sat, 3 Feb 2024 23:22:50 +0100 Subject: [PATCH 5/9] add code to run data sampling for simGC * add code to run the 'f_sample_gas_concentrations_for_gc_simulation_per_mp' * but the function itself is not yet doing what it is supposed to do * minor modification in 'functions/correct_filter_n2o_conc_by_meas_period.R' * add a test for simGC: if at least one function is called * add function 5 to the list of functions that need to run * because the detection of flatlines is needed * add new option 'flatLineInterpolationEnabled': * if flatlines are not interpolated (function 06) then the gas concentration values are not replaced by a constant, which is needed for simGC * tested with and w/o simGC: works with measCamp 202307 * not running print functions in sec 4 and actual flux calc --- .../additional_info/simulate_gc_n2o_data.txt | 9 +- .../correct_filter_n2o_conc_by_meas_period.R | 28 ++++- ..._concentrations_for_gc_simulation_per_mp.R | 12 ++ preprocessing/main_script.R | 109 +++++++++++------- 4 files changed, 112 insertions(+), 46 deletions(-) diff --git a/preprocessing/additional_info/simulate_gc_n2o_data.txt b/preprocessing/additional_info/simulate_gc_n2o_data.txt index 8fe486c..fe93b58 100644 --- a/preprocessing/additional_info/simulate_gc_n2o_data.txt +++ b/preprocessing/additional_info/simulate_gc_n2o_data.txt @@ -1,5 +1,12 @@ +# output directory for logfile +if (username == "ntriches"){ + simGCsampleDatainfoDir <- paste0(outDir, "/sim_gc_sample_data_info") +} else if (username == "jengel"){ + simGCsampleDatainfoDir <- paste0(outDir, "/sim_gc_sample_data_info") +} + # list of filter/correction functions to run with 'f_correct_n2o_conc_sorted_by_measurementperiods' -simGCfilterCorrFunctions <- c(1, 2, 3, 4, 8, 9, 10, 11, 12) +simGCfilterCorrFunctions <- c(1, 2, 3, 4, 5, 8, 9, 10, 11, 12) # min length of the mp to be used for "GC simulation" simGCminLengthMPSec <- 560 # [seconds] NOTE the 'data_filter_function_02' might remove some datapoints of each mp diff --git a/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R b/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R index 41dbac0..d2ea39e 100644 --- a/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R +++ b/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R @@ -10,7 +10,7 @@ f_correct_n2o_conc_sorted_by_measurementperiods <- function(measPeriodMetaDataDF print(paste0(">> start N2O data filter/correction for mp: ", measID)) if (simulateGCdata == "T"){ - print(">> simulateGCdata == TRUE, running only specific filter/correction functions!") + print(">> simulateGCdata == TRUE, running only specific filter/correction functions!") } # sequence row numbers of measPeriodN2o_mID to 1:x @@ -22,6 +22,14 @@ f_correct_n2o_conc_sorted_by_measurementperiods <- function(measPeriodMetaDataDF # load the info file for simulateGCdata (in particular 'simGCfilterCorrFunctions') source("additional_info/simulate_gc_n2o_data.txt") + # test if there is at least one filter function defined + if (simulateGCdata == "T"){ + if (length(simGCfilterCorrFunctions) < 1){ + print("ERROR no filter function defined was defined for GC simulation in simulate_gc_n2o_data.txt -> simGCfilterCorrFunctions") + stop("ERROR stopping the script") + } + } + # absolute path to logfile logfile_n2o_DataCorrection <- paste0(warningDir, "/", logfile_n2o_DataCorrection) @@ -77,11 +85,18 @@ f_correct_n2o_conc_sorted_by_measurementperiods <- function(measPeriodMetaDataDF # by default do not remove these measurements, and interpolate them with function 06 # if number of measurements exceeds maxProportionFlatLines: set mp invalid if (measPeriodMetaDataDF_mID$is_mp_valid > 0){ - if (simulateGCdata == "F" | (simulateGCdata == "T" & 5 %in% simGCfilterCorrFunctions & 6 %in% simGCfilterCorrFunctions)){ + # the interpolation of flatlines might be disabled for simGC (GC simulation) + flatLineInterpolationEnabled <- "T" + if (simulateGCdata == "F" | (simulateGCdata == "T" & 5 %in% simGCfilterCorrFunctions)){ + # in case interpolation of flatlines is disabled (with simGC), then the gas concentration values of flatlines are not replaced by 'constantToReplaceFlatLines_short' + if (simulateGCdata == "T" & ! 6 %in% simGCfilterCorrFunctions){ + flatLineInterpolationEnabled <- "F" + } list_hlp <- data_filter_function_05(measPeriodMetaDataDF_mID, measPeriodN2o_mID, measID, logfile_n2o_DataCorrection, maxDiffFlatLineFromMedian_short, minFlatLineLengthSec, - removeFlatLines_short, constantToReplaceFlatLines_short, maxProportionFlatLines) + removeFlatLines_short, flatLineInterpolationEnabled, + constantToReplaceFlatLines_short, maxProportionFlatLines) measPeriodMetaDataDF_mID <- as.data.frame(list_hlp[[1]]) measPeriodN2o_mID <- as.data.frame(list_hlp[[2]]) } @@ -345,6 +360,7 @@ data_filter_function_05 <- function(measPeriodMetaDataDF_mID, maxDiffFlatLineFromMedian_short, minFlatLineLengthSec, removeFlatLines_short, + flatLineInterpolationEnabled, constantToReplaceFlatLines_short, maxProportionFlatLines){ @@ -426,8 +442,10 @@ data_filter_function_05 <- function(measPeriodMetaDataDF_mID, if (removeFlatLines_short == "T"){ measPeriodN2o_mID <- measPeriodN2o_mID[-flatLineRowsRemove,] } else if(removeFlatLines_short == "F"){ - # mark rows for 'data_filter_function_06' with a specific constant - measPeriodN2o_mID$n2oppm_aeris[flatLineRowsRemove] <- constantToReplaceFlatLines_short + # mark rows for "flatline interpolation" ('data_filter_function_06') with a specific constant, if interpolation is enabled + if (flatLineInterpolationEnabled == "T"){ + measPeriodN2o_mID$n2oppm_aeris[flatLineRowsRemove] <- constantToReplaceFlatLines_short + } # mark rows in an additional column for later analysis measPeriodN2o_mID$detected_flatline[flatLineRowsRemove] <- 1 } diff --git a/preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R b/preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R index 2965478..e99ba25 100644 --- a/preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R +++ b/preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R @@ -8,7 +8,19 @@ f_sample_gas_concentrations_for_gc_simulation_per_mp <- function(mpGasConcDataDF logfile_gas_conc_sampling, username){ + print(paste0(">> start data sampling for GC simulation for mp: ", measID)) + # load the info file for simulateGCdata (in particular 'simGCfilterCorrFunctions') + source("additional_info/simulate_gc_n2o_data.txt") + + + + + + # NOTE: logfile with + # one line: measID, sample_time_sec, median_value, sample_time_is_flatline, value_of_median_calc_is_flatline + + return(mpGasConcDataDF[1,]) } \ No newline at end of file diff --git a/preprocessing/main_script.R b/preprocessing/main_script.R index 72a6a08..0c7761f 100644 --- a/preprocessing/main_script.R +++ b/preprocessing/main_script.R @@ -262,45 +262,75 @@ if (simulateGCdata == "T"){ print(">> start sampling data for GC simulation") # gas-concentration data sampling for simulation of GC (gas chromatograph) + # this makes available the "additional_info/simulate_gc_n2o_data.txt" source("functions/sample_gas_concentrations_for_gc_simulation_per_mp.R") + # create new empty DF for sampled data simGCsamplesDF <- data.frame() - # create logfile name + + # create dir for logfile, if not present + if (!file.exists(simGCsampleDatainfoDir)) { + dir.create(simGCsampleDatainfoDir, showWarnings = TRUE, recursive = FALSE, mode = "0755") + } + + # create logfile name and full path logfile_simGC_gas_conc_data_sampling <- paste0("sampling_gas_concentrations_for_gc_simulation_", measCamp, "_", currentDateTime, ".log") + logfile_simGC_gas_conc_data_sampling <- paste0(simGCsampleDatainfoDir, "/", logfile_simGC_gas_conc_data_sampling) - # TODO implement: - # * add a new column to the measPeriodN2o DF: - # * values [0,1] - # * mark all data points that were detected as (short) flatline with 1 - # * add this to data_filter_function_05 - - # call function - # use only valid mp - # write logfile (each sample value & whether it is part of flatline or value(s) for median() were taken from flatline) - # this function needs to check once that: - # length(simGCsampleTimeSec) > 0 - # simGCsampleTimeSec[1] is larger than simGCtimespanBeforeAfterSampleSec - # simGCsampleTimeSec[last] is less than "simGCminLengthMPSec - simGCtimespanBeforeAfterSampleSec" - # simGCsampleTimeSec[1] is lowest value of simGCsampleTimeSec - # simGCsampleTimeSec[last] is largest value of simGCsampleTimeSec - # then: loop over valid mp and run sampling - # rebuild the measPeriodN2oCorr DF but replace the values of the samples with the median() values - # new DF: simGCsamplesDF + # test if settings for simGC are valid + # number of samples must be larger zero + if (length(simGCsampleTimeSec) < 1){ + print("ERROR no samples were defined in simulate_gc_n2o_data.txt -> simGCsampleTimeSec") + stop("ERROR stopping the script") + } + # 'simGCsampleTimeSec[1]' is larger than simGCtimespanBeforeAfterSampleSec + if (simGCsampleTimeSec[1] < (simGCtimespanBeforeAfterSampleSec + 1)){ + print(paste0("ERROR the first simGC sample is too close to the start of the mp, closer than possible for median() calculation: ", (simGCtimespanBeforeAfterSampleSec + 1))) + stop("ERROR stopping the script") + } + # 'simGCsampleTimeSec[last]' is larger than "simGCminLengthMPSec - simGCtimespanBeforeAfterSampleSec" + if (simGCsampleTimeSec[length(simGCsampleTimeSec)] > (simGCminLengthMPSec - simGCtimespanBeforeAfterSampleSec - 1)){ + print(paste0("ERROR the last simGC sample is too close to the end of the mp, closer than possible for median() calculation: ", (simGCminLengthMPSec - simGCtimespanBeforeAfterSampleSec - 1))) + stop("ERROR stopping the script") + } + # values of 'simGCsampleTimeSec' are increasing + if (length(simGCsampleTimeSec) > 1){ + print(" simGC sampling is done per mp at [time in seconds]:") + for (i in 1:(length(simGCsampleTimeSec) - 1)){ + if (simGCsampleTimeSec[i + 1] > simGCsampleTimeSec[i]){ + print(paste0(" ", simGCsampleTimeSec[i])) + } else { + print(paste0("ERROR values of simGCsampleTimeSec are NOT increasing: ", simGCsampleTimeSec)) + stop("ERROR stopping the script") + } + } + # print the last sample value + print(paste0(" ", simGCsampleTimeSec[length(simGCsampleTimeSec)])) + } + # write header in logfile + write("measID sample_time_sec median_value sample_time_is_flatline value_of_median_calc_is_flatline", + file = logfile_simGC_gas_conc_data_sampling, sep = " ") # loop over all mp with N2O data for (row in 1:nrow(measPeriodMetaDataDF)){ - # for now only with Aeris data - if (measPeriodMetaDataDF$is_n2o_aeris[row] == 1){ + # only valid mp and for now only with Aeris data + if (measPeriodMetaDataDF$is_mp_valid[row] == 1 & measPeriodMetaDataDF$is_n2o_aeris[row] == 1){ measID <- measPeriodMetaDataDF$meas_ID[row] rowsGasConcData <- which(measPeriodN2oCorr$meas_ID == measID) - # returns data of this mp for the new DF - simGCsamplesDFmp <- f_sample_gas_concentrations_for_gc_simulation_per_mp(measPeriodN2oCorr[rowsGasConcData,], - measID, - mainDir, - logfile_simGC_gas_conc_data_sampling, - username) - simGCsamplesDF <- rbind(simGCsamplesDF, as.data.frame(simGCsamplesDFmp)) + # only mp with a length of at least 'simGCminLengthMPSec' + dateTimeFirstMeasurementMP <- measPeriodN2oCorr$datetime_datalogger[rowsGasConcData[1]] + dateTimeLastMeasurementMP <- measPeriodN2oCorr$datetime_datalogger[length(rowsGasConcData)] + timeLenghtMPSec <- as.numeric(abs(difftime(dateTimeFirstMeasurementMP, dateTimeLastMeasurementMP, units = "secs"))) + if (timeLenghtMPSec > (simGCminLengthMPSec - 1)){ + # returns data of this mp for the new DF + simGCsamplesDFmp <- f_sample_gas_concentrations_for_gc_simulation_per_mp(measPeriodN2oCorr[rowsGasConcData,], + measID, + mainDir, + logfile_simGC_gas_conc_data_sampling, + username) + simGCsamplesDF <- rbind(simGCsamplesDF, as.data.frame(simGCsamplesDFmp)) + } } } # sequence row number of simGCsamplesDF to 1:x @@ -311,11 +341,11 @@ if (simulateGCdata == "T"){ print("") print(">> create DF with only valid measurement periods") -# get the respective DF +# get the gas concentration DF: either all data or the samples for the GC simulation if (simulateGCdata == "F"){ - measPeriodN2oSection7inDF <- measPeriodN2oCorr + selValidMPinputDF <- measPeriodN2oCorr } else if (simulateGCdata == "T"){ - measPeriodN2oSection7inDF <- simGCsamplesDF + selValidMPinputDF <- simGCsamplesDF } # collect all rows of invalid mp @@ -328,7 +358,7 @@ for (row in 1:nrow(measPeriodMetaDataDF)){ # only invalid mp if (measPeriodMetaDataDF$is_mp_valid[row] == 0){ measID <- measPeriodMetaDataDF$meas_ID[row] - rowsInvalidMP <- which(measPeriodN2oSection7inDF$meas_ID == measID) + rowsInvalidMP <- which(selValidMPinputDF$meas_ID == measID) rowsAllInvalidMP <- c(rowsAllInvalidMP, rowsInvalidMP) listAllInvalidMP <- c(listAllInvalidMP, measID) } @@ -337,24 +367,23 @@ for (row in 1:nrow(measPeriodMetaDataDF)){ # copy only valid mp to new DF if (length(rowsAllInvalidMP) > 0){ - measPeriodN2oSection7outDF <- measPeriodN2oSection7inDF[-rowsAllInvalidMP, ] + selValidMPoutputDF <- selValidMPinputDF[-rowsAllInvalidMP, ] # print number of mp from DF - print(paste0(" number of valid and invalid mp: ", length(unique(measPeriodN2oSection7inDF$meas_ID)))) - print(paste0(" number of valid mp: ", length(unique(measPeriodN2oSection7outDF$meas_ID)))) + print(paste0(" number of valid and invalid mp: ", length(unique(selValidMPinputDF$meas_ID)))) + print(paste0(" number of valid mp: ", length(unique(selValidMPoutputDF$meas_ID)))) print(paste0(" list of ", length(listAllInvalidMP)," invalid mp: ")) print(listAllInvalidMP) - # sequence row number of measPeriodN2oSection7outDF to 1:x - rownames(measPeriodN2oSection7outDF) <- seq(1:nrow(measPeriodN2oSection7outDF)) + # sequence row number of selValidMPoutputDF to 1:x + rownames(selValidMPoutputDF) <- seq(1:nrow(selValidMPoutputDF)) } else{ - measPeriodN2oSection7outDF <- measPeriodN2oSection7inDF + selValidMPoutputDF <- selValidMPinputDF print(" did not find any invalid mp") } # 8. Pass data DF to new DF used for calculations ---- # create DF used with analysis and calculations -ppN2OdataDF <- measPeriodN2oSection7outDF - +ppN2OdataDF <- selValidMPoutputDF # =============================================================================================== # # ------------------------------- End data pre-processing ----------------------------------------- -- GitLab From db2ef83c1e94a2d22ad091cc7a25138959a6843c Mon Sep 17 00:00:00 2001 From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de> Date: Wed, 7 Feb 2024 22:59:02 +0100 Subject: [PATCH 6/9] finished implementation of sampling gas concentration data for GC simulation * tested: * the created DF looks good * the logfile contains the expected info --- ..._concentrations_for_gc_simulation_per_mp.R | 58 +++++++++++++++++-- preprocessing/main_script.R | 2 +- 2 files changed, 53 insertions(+), 7 deletions(-) diff --git a/preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R b/preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R index e99ba25..7a76543 100644 --- a/preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R +++ b/preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R @@ -13,14 +13,60 @@ f_sample_gas_concentrations_for_gc_simulation_per_mp <- function(mpGasConcDataDF # load the info file for simulateGCdata (in particular 'simGCfilterCorrFunctions') source("additional_info/simulate_gc_n2o_data.txt") + # sequence row numbers of mpGasConcDataDF to 1:x + rownames(mpGasConcDataDF) <- seq(1:nrow(mpGasConcDataDF)) + # get date & time of first measurement + dateTimeFirstMeasurement <- mpGasConcDataDF$datetime_datalogger[1] + # row numbers that contain the samples + sampleRows <- rep(NA, length(simGCsampleTimeSec)) + # loop over sample times + for (i in 1:length(simGCsampleTimeSec)){ + sampleTimeSec <- simGCsampleTimeSec[i] + # get date/time of sample + absoluteSampleTime <- dateTimeFirstMeasurement + as.difftime(sampleTimeSec, units="secs") + # get rows of measurements used for calc the median + measurementsStartTime <- absoluteSampleTime - as.difftime((simGCtimespanBeforeAfterSampleSec + 1), units="secs") + measurementsLastTime <- absoluteSampleTime + as.difftime((simGCtimespanBeforeAfterSampleSec + 1), units="secs") + measurementRows <- which(mpGasConcDataDF$datetime_datalogger > measurementsStartTime & + mpGasConcDataDF$datetime_datalogger < measurementsLastTime) + # get row of this sample + sampleRowNumber <- which(mpGasConcDataDF$datetime_datalogger == absoluteSampleTime) + # if there is no measurement at this exact time, use the middle row of the 'measurementRows' + if (length(sampleRowNumber) < 1){ + sampleRowNumber <- floor(median(measurementRows)) + } + # save row numer + sampleRows[i] <- sampleRowNumber + # calc median of measurements of N2O gas concentration + sampleGasConcMedian <- median(mpGasConcDataDF$n2oppm_aeris[measurementRows], na.rm = TRUE) + # add new value to DF + mpGasConcDataDF$n2oppm_aeris[sampleRowNumber] <- sampleGasConcMedian + # calc standard deviation of measurements of N2O gas concentration + sampleGasConcSD <- sd(mpGasConcDataDF$n2oppm_aeris[measurementRows], na.rm = TRUE) + # test if one of the samples is part of a short flatline + sample_time_is_flatline <- "F" + any_meas_of_median_calc_is_flatline <- "F" + if (mpGasConcDataDF$detected_flatline[sampleRowNumber] > 0){ + sample_time_is_flatline <- "T" + } + if (sum(mpGasConcDataDF$detected_flatline[measurementRows]) > 0){ + any_meas_of_median_calc_is_flatline <- "T" + } + # write one line to logfile with columns: + # measID sample_time_sec median_value median_value_sd sample_time_is_flatline any_meas_of_median_calc_is_flatline + write(paste0(measID, " ", + sampleTimeSec, " ", + sampleGasConcMedian, " ", + sampleGasConcSD, " ", + sample_time_is_flatline, " ", + any_meas_of_median_calc_is_flatline), + file = logfile_gas_conc_sampling, sep = " ", append = TRUE) + } + # return only the sample rows + mpGasConcDataDF <- mpGasConcDataDF[sampleRows,] - - - # NOTE: logfile with - # one line: measID, sample_time_sec, median_value, sample_time_is_flatline, value_of_median_calc_is_flatline - - return(mpGasConcDataDF[1,]) + return(mpGasConcDataDF) } \ No newline at end of file diff --git a/preprocessing/main_script.R b/preprocessing/main_script.R index 0c7761f..9b8d627 100644 --- a/preprocessing/main_script.R +++ b/preprocessing/main_script.R @@ -309,7 +309,7 @@ if (simulateGCdata == "T"){ } # write header in logfile - write("measID sample_time_sec median_value sample_time_is_flatline value_of_median_calc_is_flatline", + write("measID sample_time_sec median_value median_value_sd sample_time_is_flatline any_meas_of_median_calc_is_flatline", file = logfile_simGC_gas_conc_data_sampling, sep = " ") # loop over all mp with N2O data -- GitLab From 9e40e3e179423b66698d2d8ef3fbd74bccdbe11b Mon Sep 17 00:00:00 2001 From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de> Date: Wed, 7 Feb 2024 23:10:45 +0100 Subject: [PATCH 7/9] minor docu improvements --- .../sample_gas_concentrations_for_gc_simulation_per_mp.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R b/preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R index 7a76543..98e3e42 100644 --- a/preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R +++ b/preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R @@ -33,11 +33,11 @@ f_sample_gas_concentrations_for_gc_simulation_per_mp <- function(mpGasConcDataDF mpGasConcDataDF$datetime_datalogger < measurementsLastTime) # get row of this sample sampleRowNumber <- which(mpGasConcDataDF$datetime_datalogger == absoluteSampleTime) - # if there is no measurement at this exact time, use the middle row of the 'measurementRows' + # if there is no measurement at this exact time, use the center-row of the 'measurementRows' if (length(sampleRowNumber) < 1){ sampleRowNumber <- floor(median(measurementRows)) } - # save row numer + # save row number sampleRows[i] <- sampleRowNumber # calc median of measurements of N2O gas concentration sampleGasConcMedian <- median(mpGasConcDataDF$n2oppm_aeris[measurementRows], na.rm = TRUE) -- GitLab From a898c97e7aa9dfe455d5494f9718b957c04e2814 Mon Sep 17 00:00:00 2001 From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de> Date: Wed, 14 Feb 2024 23:41:24 +0100 Subject: [PATCH 8/9] bugfix in calculation of mp length for simGC function --- preprocessing/main_script.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/preprocessing/main_script.R b/preprocessing/main_script.R index 2b76f45..7785002 100644 --- a/preprocessing/main_script.R +++ b/preprocessing/main_script.R @@ -322,8 +322,8 @@ if (simulateGCdata == "T"){ rowsGasConcData <- which(measPeriodN2oCorr$meas_ID == measID) # only mp with a length of at least 'simGCminLengthMPSec' dateTimeFirstMeasurementMP <- measPeriodN2oCorr$datetime_datalogger[rowsGasConcData[1]] - dateTimeLastMeasurementMP <- measPeriodN2oCorr$datetime_datalogger[length(rowsGasConcData)] - timeLenghtMPSec <- as.numeric(abs(difftime(dateTimeFirstMeasurementMP, dateTimeLastMeasurementMP, units = "secs"))) + dateTimeLastMeasurementMP <- measPeriodN2oCorr$datetime_datalogger[rowsGasConcData[length(rowsGasConcData)]] + timeLenghtMPSec <- as.numeric(abs(difftime(dateTimeFirstMeasurementMP, dateTimeLastMeasurementMP, units = "secs"))) if (timeLenghtMPSec > (simGCminLengthMPSec - 1)){ # returns data of this mp for the new DF simGCsamplesDFmp <- f_sample_gas_concentrations_for_gc_simulation_per_mp(measPeriodN2oCorr[rowsGasConcData,], -- GitLab From c1716031227a96ca56659136f33ea5c408fdec21 Mon Sep 17 00:00:00 2001 From: Nathalie Triches <ntriches@bgc-jena.mpg.de> Date: Thu, 15 Feb 2024 10:37:23 +0200 Subject: [PATCH 9/9] test GA-PGA comparison and change values in txt file * works well for Sept and July, apparently now for May, too :) --- preprocessing/additional_info/simulate_gc_n2o_data.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/preprocessing/additional_info/simulate_gc_n2o_data.txt b/preprocessing/additional_info/simulate_gc_n2o_data.txt index fe93b58..3d1eb18 100644 --- a/preprocessing/additional_info/simulate_gc_n2o_data.txt +++ b/preprocessing/additional_info/simulate_gc_n2o_data.txt @@ -13,7 +13,7 @@ simGCminLengthMPSec <- 560 # [seconds] NOTE the 'data_filter_func # time in seconds the GC samples are taken (per mp; from mp start) # this also defines the number of samples -simGCsampleTimeSec <- c(100, 200, 300, 400, 500) # time [s] after mp start for taking samples +simGCsampleTimeSec <- c(60, 180, 300, 420, 550) # time [s] after mp start for taking samples # time in seconds before and after a sampling point to calculate (median) sampling value simGCtimespanBeforeAfterSampleSec <- 5 # timespan before and after a sample used for calc of sample value -- GitLab