From 5b9fa7c15d4bbc2a8b2cbe61805dadde8a042b62 Mon Sep 17 00:00:00 2001
From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de>
Date: Sun, 28 Jan 2024 21:31:22 +0100
Subject: [PATCH 1/9] add new "simulate GC" info file, and add new IF
 statements for filter/correction

* add new option in main_script.R
  * use the PGA data to simulate GC data, and run only specific filter/correction functions
  * simulateGCdata <- "F"   # [F,T]
* add new additional_info/simulate_gc_n2o_data.txt containing, for now:
  * list of filter/correction functions to run with 'f_correct_n2o_conc_sorted_by_measurementperiods'
  * simGCfilterCorrFunctions <- c(1, 2, 3, 4, 8, 9, 10, 11, 12)
* add new IF statements to 'f_correct_n2o_conc_sorted_by_measurementperiods'
  to run only selected functions if simulateGCdata == "T"
* tested: works
---
 .../additional_info/simulate_gc_n2o_data.txt  |   3 +
 .../correct_filter_n2o_conc_by_meas_period.R  | 125 +++++++++++-------
 preprocessing/main_script.R                   |   5 +-
 3 files changed, 85 insertions(+), 48 deletions(-)
 create mode 100644 preprocessing/additional_info/simulate_gc_n2o_data.txt

diff --git a/preprocessing/additional_info/simulate_gc_n2o_data.txt b/preprocessing/additional_info/simulate_gc_n2o_data.txt
new file mode 100644
index 0000000..e04cf8e
--- /dev/null
+++ b/preprocessing/additional_info/simulate_gc_n2o_data.txt
@@ -0,0 +1,3 @@
+# list of filter/correction functions to run with 'f_correct_n2o_conc_sorted_by_measurementperiods'
+simGCfilterCorrFunctions <- c(1, 2, 3, 4, 8, 9, 10, 11, 12)
+
diff --git a/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R b/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R
index 3227f3e..b407981 100644
--- a/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R
+++ b/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R
@@ -1,20 +1,27 @@
 # main filter script for correcting GHG concentrations before flux calculation
 
 f_correct_n2o_conc_sorted_by_measurementperiods <- function(measPeriodMetaDataDF_mID,
-                                                          measPeriodN2o_mID,
-                                                          measID,
-                                                          mainDir,
-                                                          logfile_n2o_DataCorrection,
-                                                          username){
+                                                            measPeriodN2o_mID,
+                                                            measID,
+                                                            mainDir,
+                                                            logfile_n2o_DataCorrection,
+                                                            username,
+                                                            simulateGCdata){
 
   print(paste0(">> start N2O data filter/correction for mp: ", measID))
+  if (simulateGCdata == "T"){
+    print(">> simulateGCdata == TRUE, running only specific filter/correction functions!")
+  }
 
   # sequence row numbers of measPeriodN2o_mID to 1:x
   rownames(measPeriodN2o_mID) <- seq(1:nrow(measPeriodN2o_mID))
 
-  # load parameters needed for correcting and filtering concentrations 
+  # load parameters needed for correcting and filtering concentrations
   source("additional_info/correction_filter_n2o.txt")
 
+  # load the info file for simulateGCdata (in particular 'simGCfilterCorrFunctions')
+  source("additional_info/simulate_gc_n2o_data.txt")
+
   # absolute path to logfile
   logfile_n2o_DataCorrection <- paste0(warningDir, "/", logfile_n2o_DataCorrection)
 
@@ -28,32 +35,40 @@ f_correct_n2o_conc_sorted_by_measurementperiods <- function(measPeriodMetaDataDF
   # 01 test for min and max length of measurement period
   #    if thresholds are exceeded: set mp invalid
   if (measPeriodMetaDataDF_mID$is_mp_valid > 0){
-    measPeriodMetaDataDF_mID <- data_filter_function_01(measPeriodMetaDataDF_mID, measPeriodN2o_mID,
-                                                        measID, logfile_n2o_DataCorrection,
-                                                        minLengthMPSec, maxLengthMPSec)
+    if (simulateGCdata == "F" | (simulateGCdata == "T" & 1 %in% simGCfilterCorrFunctions)){
+      measPeriodMetaDataDF_mID <- data_filter_function_01(measPeriodMetaDataDF_mID, measPeriodN2o_mID,
+                                                          measID, logfile_n2o_DataCorrection,
+                                                          minLengthMPSec, maxLengthMPSec)
+    }
   }
 
   # 02 remove data points (DF rows) within specific time-interval at start and end of measurement period
   if (measPeriodMetaDataDF_mID$is_mp_valid > 0){
-    measPeriodN2o_mID <- data_filter_function_02(measPeriodMetaDataDF_mID, measPeriodN2o_mID,
-                                                 measID, logfile_n2o_DataCorrection,
-                                                 startRemoveDataSec, endRemoveDataSec)
+    if (simulateGCdata == "F" | (simulateGCdata == "T" & 2 %in% simGCfilterCorrFunctions)){
+      measPeriodN2o_mID <- data_filter_function_02(measPeriodMetaDataDF_mID, measPeriodN2o_mID,
+                                                   measID, logfile_n2o_DataCorrection,
+                                                   startRemoveDataSec, endRemoveDataSec)
+    }
   }
 
   # 03 test if N2O measurements always have the same concentration within threshold (e.g. (instrument mistake))
   #    if yes: set mp invalid
   if (measPeriodMetaDataDF_mID$is_mp_valid > 0){
-    measPeriodMetaDataDF_mID <- data_filter_function_03(measPeriodMetaDataDF_mID, measPeriodN2o_mID,
-                                                        measID, logfile_n2o_DataCorrection,
-                                                        maxDiffFlatLine_mp)
+    if (simulateGCdata == "F" | (simulateGCdata == "T" & 3 %in% simGCfilterCorrFunctions)){
+      measPeriodMetaDataDF_mID <- data_filter_function_03(measPeriodMetaDataDF_mID, measPeriodN2o_mID,
+                                                          measID, logfile_n2o_DataCorrection,
+                                                          maxDiffFlatLine_mp)
+    }
   }
 
   # 04 test if PAR changes abruptly within specific time period
   #    if yes: set mp invalid
   if (measPeriodMetaDataDF_mID$is_mp_valid > 0){
-    measPeriodMetaDataDF_mID <- data_filter_function_04(measPeriodMetaDataDF_mID, measPeriodN2o_mID,
-                                                        measID, logfile_n2o_DataCorrection,
-                                                        maxPARChange, changeWithinTimeSec)
+    if (simulateGCdata == "F" | (simulateGCdata == "T" & 4 %in% simGCfilterCorrFunctions)){
+      measPeriodMetaDataDF_mID <- data_filter_function_04(measPeriodMetaDataDF_mID, measPeriodN2o_mID,
+                                                          measID, logfile_n2o_DataCorrection,
+                                                          maxPARChange, changeWithinTimeSec)
+    }
   }
 
   # 05 detect short N2O flatlines
@@ -62,69 +77,85 @@ f_correct_n2o_conc_sorted_by_measurementperiods <- function(measPeriodMetaDataDF
   #               by default do not remove these measurements, and interpolate them with function 06
   #    if number of measurements exceeds maxProportionFlatLines: set mp invalid
   if (measPeriodMetaDataDF_mID$is_mp_valid > 0){
-    list_hlp <- data_filter_function_05(measPeriodMetaDataDF_mID, measPeriodN2o_mID,
-                                        measID, logfile_n2o_DataCorrection,
-                                        maxDiffFlatLineFromMedian_short, minFlatLineLengthSec,
-                                        removeFlatLines_short, constantToReplaceFlatLines_short, maxProportionFlatLines)
-    measPeriodMetaDataDF_mID  <- as.data.frame(list_hlp[[1]])
-    measPeriodN2o_mID         <- as.data.frame(list_hlp[[2]])
+    if (simulateGCdata == "F" | (simulateGCdata == "T" & 5 %in% simGCfilterCorrFunctions & 6 %in% simGCfilterCorrFunctions)){
+      list_hlp <- data_filter_function_05(measPeriodMetaDataDF_mID, measPeriodN2o_mID,
+                                          measID, logfile_n2o_DataCorrection,
+                                          maxDiffFlatLineFromMedian_short, minFlatLineLengthSec,
+                                          removeFlatLines_short, constantToReplaceFlatLines_short, maxProportionFlatLines)
+      measPeriodMetaDataDF_mID  <- as.data.frame(list_hlp[[1]])
+      measPeriodN2o_mID         <- as.data.frame(list_hlp[[2]])
+    }
   }
 
   # 06 linear interpolation of short N2O flatlines
   #    only if these short flatlines were not removed with function 05 ('removeFlatLines_short == F')
   if (removeFlatLines_short == "F" & measPeriodMetaDataDF_mID$is_mp_valid > 0){
-    measPeriodN2o_mID <- data_filter_function_06(measPeriodMetaDataDF_mID, measPeriodN2o_mID,
-                                                 measID, logfile_n2o_DataCorrection,
-                                                 constantToReplaceFlatLines_short, timePeriodAroundFlatlinesSec,
-                                                 minDistanceBetweenFlatlinesSec)
+    if (simulateGCdata == "F" | (simulateGCdata == "T" & 5 %in% simGCfilterCorrFunctions & 6 %in% simGCfilterCorrFunctions)){
+      measPeriodN2o_mID <- data_filter_function_06(measPeriodMetaDataDF_mID, measPeriodN2o_mID,
+                                                   measID, logfile_n2o_DataCorrection,
+                                                   constantToReplaceFlatLines_short, timePeriodAroundFlatlinesSec,
+                                                   minDistanceBetweenFlatlinesSec)
+    }
   }
 
   # 07 correct / interpolation N2O values beyond reasonable values
   if (measPeriodMetaDataDF_mID$is_mp_valid > 0){
-    measPeriodN2o_mID <- data_filter_function_07(measPeriodMetaDataDF_mID, measPeriodN2o_mID,
-                                                 measID, logfile_n2o_DataCorrection,
-                                                 minValueN2O, maxValueN2O,
-                                                 timePeriodAroundOutliersSec)
+    if (simulateGCdata == "F" | (simulateGCdata == "T" & 7 %in% simGCfilterCorrFunctions)){
+      measPeriodN2o_mID <- data_filter_function_07(measPeriodMetaDataDF_mID, measPeriodN2o_mID,
+                                                   measID, logfile_n2o_DataCorrection,
+                                                   minValueN2O, maxValueN2O,
+                                                   timePeriodAroundOutliersSec)
+    }
   }
 
   # 08 set unreasonable values of soil temperature to NA
   #    4 columns from 4 sensors
   #    column names must contain "soilT" 
   if (measPeriodMetaDataDF_mID$is_mp_valid > 0){
-    measPeriodN2o_mID <- data_filter_function_08(measPeriodMetaDataDF_mID, measPeriodN2o_mID,
-                                                 measID, logfile_n2o_DataCorrection,
-                                                 minValueTSoilDegC, maxValueTSoilDegC)
+    if (simulateGCdata == "F" | (simulateGCdata == "T" & 8 %in% simGCfilterCorrFunctions)){
+      measPeriodN2o_mID <- data_filter_function_08(measPeriodMetaDataDF_mID, measPeriodN2o_mID,
+                                                   measID, logfile_n2o_DataCorrection,
+                                                   minValueTSoilDegC, maxValueTSoilDegC)
+    }
   }
 
   # 09 set unreasonable values of "VWC soil 12 cm" to NA (volumetric water content at 12 cm depth)
   if (measPeriodMetaDataDF_mID$is_mp_valid > 0){
-    measPeriodN2o_mID <- data_filter_function_09(measPeriodMetaDataDF_mID, measPeriodN2o_mID,
-                                                 measID, logfile_n2o_DataCorrection,
-                                                 minValueVWCsoil12, maxValueVWCsoil12)
+    if (simulateGCdata == "F" | (simulateGCdata == "T" & 9 %in% simGCfilterCorrFunctions)){
+      measPeriodN2o_mID <- data_filter_function_09(measPeriodMetaDataDF_mID, measPeriodN2o_mID,
+                                                   measID, logfile_n2o_DataCorrection,
+                                                   minValueVWCsoil12, maxValueVWCsoil12)
+    }
   }
 
   # 10 set unreasonable values of "VWC soil 30 cm" to NA (volumetric water content at 30 cm depth)
   #    all values would be 0 in case the sensor-device could not be inserted into the soil because of frozen soil
   #      -> add to logfile as "frozen soil [F,T]"
   if (measPeriodMetaDataDF_mID$is_mp_valid > 0){
-    measPeriodN2o_mID <- data_filter_function_10(measPeriodMetaDataDF_mID, measPeriodN2o_mID,
-                                                 measID, logfile_n2o_DataCorrection,
-                                                 minValueVWCsoil30, maxValueVWCsoil30)
+    if (simulateGCdata == "F" | (simulateGCdata == "T" & 10 %in% simGCfilterCorrFunctions)){
+      measPeriodN2o_mID <- data_filter_function_10(measPeriodMetaDataDF_mID, measPeriodN2o_mID,
+                                                   measID, logfile_n2o_DataCorrection,
+                                                   minValueVWCsoil30, maxValueVWCsoil30)
+    }
   }
 
   # 11 set PAR negative values to zero, and unreasonably high values to NA
   if (measPeriodMetaDataDF_mID$is_mp_valid > 0){
-    measPeriodN2o_mID <- data_filter_function_11(measPeriodMetaDataDF_mID, measPeriodN2o_mID,
-                                                 measID, logfile_n2o_DataCorrection,
-                                                 minValuePAR, maxValuePAR)
+    if (simulateGCdata == "F" | (simulateGCdata == "T" & 11 %in% simGCfilterCorrFunctions)){
+      measPeriodN2o_mID <- data_filter_function_11(measPeriodMetaDataDF_mID, measPeriodN2o_mID,
+                                                   measID, logfile_n2o_DataCorrection,
+                                                   minValuePAR, maxValuePAR)
+    }
   }
 
   # 12 test for negative air temperatures in the chamber ('chamberTdegC')
   #    if yes: set mp invalid
   if (measPeriodMetaDataDF_mID$is_mp_valid > 0){
-    measPeriodMetaDataDF_mID <- data_filter_function_12(measPeriodMetaDataDF_mID, measPeriodN2o_mID,
-                                                        measID, logfile_n2o_DataCorrection,
-                                                        minValueTairChamberDegC)
+    if (simulateGCdata == "F" | (simulateGCdata == "T" & 12 %in% simGCfilterCorrFunctions)){
+      measPeriodMetaDataDF_mID <- data_filter_function_12(measPeriodMetaDataDF_mID, measPeriodN2o_mID,
+                                                          measID, logfile_n2o_DataCorrection,
+                                                          minValueTairChamberDegC)
+    }
   }
 
   return(list(measPeriodMetaDataDF_mID, measPeriodN2o_mID))
diff --git a/preprocessing/main_script.R b/preprocessing/main_script.R
index 013d266..7445e56 100644
--- a/preprocessing/main_script.R
+++ b/preprocessing/main_script.R
@@ -40,6 +40,8 @@ if (measCamp == "202307") process_aeris_raw_data <- "T"    ## read raw data from
 # plot figures [T,F]
 create_plots_ghg_conc <- "F"
 
+# use the PGA data to simulate GC data, and run only specific filter/correction functions
+simulateGCdata <- "F"   # [F,T]
 
 
 # =============================================================================================== #
@@ -213,7 +215,8 @@ for (row in 1:nrow(measPeriodMetaDataDF)){
                                                                    measID,
                                                                    mainDir,
                                                                    logfile_n2o_DataCorrection,
-                                                                   username)
+                                                                   username,
+                                                                   simulateGCdata)
     # extract vector and data.frame from list
     measPeriodMetaDataDF[row,]  <- as.data.frame(list_hlp[[1]])
     measPeriodN2oCorr           <- rbind(measPeriodN2oCorr, as.data.frame(list_hlp[[2]]))
-- 
GitLab


From ae5c7c3b0abc7f180b1f93a8f5cfdc6c43221b33 Mon Sep 17 00:00:00 2001
From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de>
Date: Sun, 28 Jan 2024 22:28:58 +0100
Subject: [PATCH 2/9] add parameters to simGC info file and prepare simGC
 sampling

* add new section 6 in main_script.R
  * this will call the simGC sampling function
* modify section 7 to use either the GC or PGA DF
* tested: works with: simulateGCdata <- "F"
---
 .../additional_info/simulate_gc_n2o_data.txt  | 11 +++-
 preprocessing/main_script.R                   | 51 +++++++++++++++----
 2 files changed, 52 insertions(+), 10 deletions(-)

diff --git a/preprocessing/additional_info/simulate_gc_n2o_data.txt b/preprocessing/additional_info/simulate_gc_n2o_data.txt
index e04cf8e..8fe486c 100644
--- a/preprocessing/additional_info/simulate_gc_n2o_data.txt
+++ b/preprocessing/additional_info/simulate_gc_n2o_data.txt
@@ -1,3 +1,12 @@
 # list of filter/correction functions to run with 'f_correct_n2o_conc_sorted_by_measurementperiods'
-simGCfilterCorrFunctions <- c(1, 2, 3, 4, 8, 9, 10, 11, 12)
+simGCfilterCorrFunctions          <- c(1, 2, 3, 4, 8, 9, 10, 11, 12)
 
+# min length of the mp to be used for "GC simulation"
+simGCminLengthMPSec               <- 560  # [seconds] NOTE the 'data_filter_function_02' might remove some datapoints of each mp
+
+# time in seconds the GC samples are taken (per mp; from mp start)
+# this also defines the number of samples
+simGCsampleTimeSec                <- c(100, 200, 300, 400, 500)  # time [s] after mp start for taking samples
+
+# time in seconds before and after a sampling point to calculate (median) sampling value
+simGCtimespanBeforeAfterSampleSec <- 5   # timespan before and after a sample used for calc of sample value
diff --git a/preprocessing/main_script.R b/preprocessing/main_script.R
index 7445e56..f485973 100644
--- a/preprocessing/main_script.R
+++ b/preprocessing/main_script.R
@@ -258,9 +258,42 @@ if (process_n2o == "T"){
 
 #########################################################################################
 ############### 6 create DF with only valid measurement periods #########################
+if (simulateGCdata == "T"){
+  print("")
+  print(">> sample data for GC simulation")
+
+  # TODO implement:
+    # * add a new column to the measPeriodN2o DF:
+    #   * values [0,1] 
+    #   * mark all data points that were detected as (short) flatline with 1
+    #   * add this to data_filter_function_05
+
+  # call function
+  #   use only valid mp
+  #   write logfile (each sample value & whether it is part of flatline or value(s) for median() were taken from flatline)
+  #   this function needs to check once that:
+    # length(simGCsampleTimeSec) > 0
+    # simGCsampleTimeSec[1] is larger than simGCtimespanBeforeAfterSampleSec
+    # simGCsampleTimeSec[last] is less than "simGCminLengthMPSec - simGCtimespanBeforeAfterSampleSec"
+    # simGCsampleTimeSec[1] is lowest value of simGCsampleTimeSec
+    # simGCsampleTimeSec[last] is largest value of simGCsampleTimeSec
+  # then: loop over valid mp and run sampling
+  #   rebuild the measPeriodN2oCorr DF but replace the values of the samples with the median() values
+  # new DF: simGCsamplesDF
+}
+
+#########################################################################################
+############### 7 create DF with only valid measurement periods #########################
 print("")
 print(">> create DF with only valid measurement periods")
 
+# get the respective DF
+if (simulateGCdata == "F"){
+  measPeriodN2oSection7inDF <- measPeriodN2oCorr
+} else if (simulateGCdata == "T"){
+  measPeriodN2oSection7inDF <- simGCsamplesDF
+}
+
 # collect all rows of invalid mp
 rowsAllInvalidMP <- vector()
 listAllInvalidMP <- vector()
@@ -271,7 +304,7 @@ for (row in 1:nrow(measPeriodMetaDataDF)){
     # only invalid mp
     if (measPeriodMetaDataDF$is_mp_valid[row] == 0){
       measID            <- measPeriodMetaDataDF$meas_ID[row]
-      rowsInvalidMP     <- which(measPeriodN2oCorr$meas_ID == measID)
+      rowsInvalidMP     <- which(measPeriodN2oSection7inDF$meas_ID == measID)
       rowsAllInvalidMP  <- c(rowsAllInvalidMP, rowsInvalidMP)
       listAllInvalidMP  <- c(listAllInvalidMP, measID)
     }
@@ -280,24 +313,24 @@ for (row in 1:nrow(measPeriodMetaDataDF)){
 
 # copy only valid mp to new DF
 if (length(rowsAllInvalidMP) > 0){
-  measPeriodN2oCorr_validonly <- measPeriodN2oCorr[-rowsAllInvalidMP, ]
+  measPeriodN2oSection7outDF <- measPeriodN2oSection7inDF[-rowsAllInvalidMP, ]
   # print number of mp from DF
-  print(paste0("   number of valid and invalid mp: ", length(unique(measPeriodN2oCorr$meas_ID))))
-  print(paste0("   number of valid mp:             ", length(unique(measPeriodN2oCorr_validonly$meas_ID))))
+  print(paste0("   number of valid and invalid mp: ", length(unique(measPeriodN2oSection7inDF$meas_ID))))
+  print(paste0("   number of valid mp:             ", length(unique(measPeriodN2oSection7outDF$meas_ID))))
   print(paste0("   list of ", length(listAllInvalidMP)," invalid mp: "))
   print(listAllInvalidMP)
-  # sequence row number of measPeriodN2oCorr_validonly to 1:x
-  rownames(measPeriodN2oCorr_validonly) <- seq(1:nrow(measPeriodN2oCorr_validonly))
+  # sequence row number of measPeriodN2oSection7outDF to 1:x
+  rownames(measPeriodN2oSection7outDF) <- seq(1:nrow(measPeriodN2oSection7outDF))
 } else{
-  measPeriodN2oCorr_validonly <- measPeriodN2oCorr
+  measPeriodN2oSection7outDF <- measPeriodN2oSection7inDF
   print("   did not find any invalid mp")
 }
 
 #########################################################################################
-############### 7 end of pre-processing & pass data DF to new DF used for calculations ##
+############### 8 end of pre-processing & pass data DF to new DF used for calculations ##
 
 # create DF used with analysis and calculations
-ppN2OdataDF <- measPeriodN2oCorr_validonly
+ppN2OdataDF <- measPeriodN2oSection7outDF
 
 
 # =============================================================================================== #
-- 
GitLab


From 35cdd7e15b41275fb01363765223ad02716f3f49 Mon Sep 17 00:00:00 2001
From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de>
Date: Tue, 30 Jan 2024 21:40:00 +0100
Subject: [PATCH 3/9] add additional column to N2O DF to mark short flatlines

* added in section 4
* column name: detected_flatline
  * values:
    * 0 = no flatline detected [default]
    * 1 = flatline detected
* the 'data_filter_function_05' in correct_filter_n2o_conc_by_meas_period.R
  does modify the column value
* tested: works
---
 .../functions/correct_filter_n2o_conc_by_meas_period.R    | 8 ++++++--
 preprocessing/main_script.R                               | 4 ++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R b/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R
index b407981..41dbac0 100644
--- a/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R
+++ b/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R
@@ -419,13 +419,17 @@ data_filter_function_05 <- function(measPeriodMetaDataDF_mID,
     }
   }
 
-  # remove rows that are part of flat lines, or set them to -9999 and interpolate linearely with function 06
+  # either remove rows that are part of flat lines
+  # or set them to 'constantToReplaceFlatLines_short' and interpolate linearely with function 06
   if (sum(measurementsFlatLine) > 0){
     flatLineRowsRemove <- which(measurementsFlatLine > 0)
     if (removeFlatLines_short == "T"){
       measPeriodN2o_mID <- measPeriodN2o_mID[-flatLineRowsRemove,]
     } else if(removeFlatLines_short == "F"){
-      measPeriodN2o_mID$n2oppm_aeris[flatLineRowsRemove] <- constantToReplaceFlatLines_short
+      # mark rows for 'data_filter_function_06' with a specific constant
+      measPeriodN2o_mID$n2oppm_aeris[flatLineRowsRemove]      <- constantToReplaceFlatLines_short
+      # mark rows in an additional column for later analysis
+      measPeriodN2o_mID$detected_flatline[flatLineRowsRemove] <- 1
     }
   }
 
diff --git a/preprocessing/main_script.R b/preprocessing/main_script.R
index f485973..e859f47 100644
--- a/preprocessing/main_script.R
+++ b/preprocessing/main_script.R
@@ -203,6 +203,10 @@ source("functions/correct_filter_n2o_conc_by_meas_period.R")
 measPeriodN2oCorr <- data.frame()
 # create logfile name
 logfile_n2o_DataCorrection <- paste0("n2o_dataCorrection_for_measCamp_", measCamp, "_", currentDateTime, ".log")
+# add additional column to mark detected short flat lines [0,1] (see 'data_filter_function_05')
+newColumn                       <- c(rep(0, nrow(measPeriodN2o)))
+measPeriodN2o$detected_flatline <- newColumn
+remove(newColumn)
 # loop over all mp with N2O data
 for (row in 1:nrow(measPeriodMetaDataDF)){
   # for now only with Aeris data
-- 
GitLab


From b32b70005e72d0345b8bee123bca5a99ab570ee5 Mon Sep 17 00:00:00 2001
From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de>
Date: Tue, 30 Jan 2024 23:10:19 +0100
Subject: [PATCH 4/9] add new file with function for gas-concentration sampling

* no functional code, just function call and empty function
---
 ..._concentrations_for_gc_simulation_per_mp.R | 14 ++++++++++
 preprocessing/main_script.R                   | 28 ++++++++++++++++++-
 2 files changed, 41 insertions(+), 1 deletion(-)
 create mode 100644 preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R

diff --git a/preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R b/preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R
new file mode 100644
index 0000000..2965478
--- /dev/null
+++ b/preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R
@@ -0,0 +1,14 @@
+# sample specific data from gas concentration measurements with PGA
+# sampling per measurement period
+# return DF with only these few sampled data
+
+f_sample_gas_concentrations_for_gc_simulation_per_mp <- function(mpGasConcDataDF,
+                                                                 measID,
+                                                                 mainDir,
+                                                                 logfile_gas_conc_sampling,
+                                                                 username){
+
+
+
+
+}
\ No newline at end of file
diff --git a/preprocessing/main_script.R b/preprocessing/main_script.R
index e859f47..acad2c1 100644
--- a/preprocessing/main_script.R
+++ b/preprocessing/main_script.R
@@ -264,7 +264,14 @@ if (process_n2o == "T"){
 ############### 6 create DF with only valid measurement periods #########################
 if (simulateGCdata == "T"){
   print("")
-  print(">> sample data for GC simulation")
+  print(">> start sampling data for GC simulation")
+
+  # gas-concentration data sampling for simulation of GC (gas chromatograph)
+  source("functions/sample_gas_concentrations_for_gc_simulation_per_mp.R")
+  # create new empty DF for sampled data
+  simGCsamplesDF <- data.frame()
+  # create logfile name
+  logfile_simGC_gas_conc_data_sampling <- paste0("sampling_gas_concentrations_for_gc_simulation_", measCamp, "_", currentDateTime, ".log")
 
   # TODO implement:
     # * add a new column to the measPeriodN2o DF:
@@ -284,6 +291,25 @@ if (simulateGCdata == "T"){
   # then: loop over valid mp and run sampling
   #   rebuild the measPeriodN2oCorr DF but replace the values of the samples with the median() values
   # new DF: simGCsamplesDF
+
+
+  # loop over all mp with N2O data
+  for (row in 1:nrow(measPeriodMetaDataDF)){
+    # for now only with Aeris data
+    if (measPeriodMetaDataDF$is_n2o_aeris[row] == 1){
+      measID            <- measPeriodMetaDataDF$meas_ID[row]
+      rowsGasConcData   <- which(measPeriodN2oCorr$meas_ID == measID)
+      # returns data of this mp for the new DF
+      simGCsamplesDFmp  <- f_sample_gas_concentrations_for_gc_simulation_per_mp(measPeriodN2oCorr[rowsGasConcData,],
+                                                                                measID,
+                                                                                mainDir,
+                                                                                logfile_simGC_gas_conc_data_sampling,
+                                                                                username)
+      simGCsamplesDF  <- rbind(simGCsamplesDF, as.data.frame(simGCsamplesDFmp))
+    }
+  }
+  # sequence row number of simGCsamplesDF to 1:x
+  rownames(simGCsamplesDF) <- seq(1:nrow(simGCsamplesDF))
 }
 
 #########################################################################################
-- 
GitLab


From 13b3ba20a07182f80255a97d905dff6a05422be7 Mon Sep 17 00:00:00 2001
From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de>
Date: Sat, 3 Feb 2024 23:22:50 +0100
Subject: [PATCH 5/9] add code to run data sampling for simGC

* add code to run the 'f_sample_gas_concentrations_for_gc_simulation_per_mp'
  * but the function itself is not yet doing what it is supposed to do
* minor modification in 'functions/correct_filter_n2o_conc_by_meas_period.R'
  * add a test for simGC: if at least one function is called
  * add function 5 to the list of functions that need to run
    * because the detection of flatlines is needed
    * add new option 'flatLineInterpolationEnabled':
      * if flatlines are not interpolated (function 06)
        then the gas concentration values are not replaced
        by a constant, which is needed for simGC
* tested with and w/o simGC: works with measCamp 202307
  * not running print functions in sec 4 and actual flux calc
---
 .../additional_info/simulate_gc_n2o_data.txt  |   9 +-
 .../correct_filter_n2o_conc_by_meas_period.R  |  28 ++++-
 ..._concentrations_for_gc_simulation_per_mp.R |  12 ++
 preprocessing/main_script.R                   | 109 +++++++++++-------
 4 files changed, 112 insertions(+), 46 deletions(-)

diff --git a/preprocessing/additional_info/simulate_gc_n2o_data.txt b/preprocessing/additional_info/simulate_gc_n2o_data.txt
index 8fe486c..fe93b58 100644
--- a/preprocessing/additional_info/simulate_gc_n2o_data.txt
+++ b/preprocessing/additional_info/simulate_gc_n2o_data.txt
@@ -1,5 +1,12 @@
+# output directory for logfile
+if (username == "ntriches"){
+  simGCsampleDatainfoDir          <- paste0(outDir, "/sim_gc_sample_data_info")
+} else if (username == "jengel"){
+  simGCsampleDatainfoDir          <- paste0(outDir, "/sim_gc_sample_data_info")
+}
+
 # list of filter/correction functions to run with 'f_correct_n2o_conc_sorted_by_measurementperiods'
-simGCfilterCorrFunctions          <- c(1, 2, 3, 4, 8, 9, 10, 11, 12)
+simGCfilterCorrFunctions          <- c(1, 2, 3, 4, 5, 8, 9, 10, 11, 12)
 
 # min length of the mp to be used for "GC simulation"
 simGCminLengthMPSec               <- 560  # [seconds] NOTE the 'data_filter_function_02' might remove some datapoints of each mp
diff --git a/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R b/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R
index 41dbac0..d2ea39e 100644
--- a/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R
+++ b/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R
@@ -10,7 +10,7 @@ f_correct_n2o_conc_sorted_by_measurementperiods <- function(measPeriodMetaDataDF
 
   print(paste0(">> start N2O data filter/correction for mp: ", measID))
   if (simulateGCdata == "T"){
-    print(">> simulateGCdata == TRUE, running only specific filter/correction functions!")
+    print(">>   simulateGCdata == TRUE, running only specific filter/correction functions!")
   }
 
   # sequence row numbers of measPeriodN2o_mID to 1:x
@@ -22,6 +22,14 @@ f_correct_n2o_conc_sorted_by_measurementperiods <- function(measPeriodMetaDataDF
   # load the info file for simulateGCdata (in particular 'simGCfilterCorrFunctions')
   source("additional_info/simulate_gc_n2o_data.txt")
 
+  # test if there is at least one filter function defined
+  if (simulateGCdata == "T"){
+    if (length(simGCfilterCorrFunctions) < 1){
+      print("ERROR no filter function defined was defined for GC simulation in simulate_gc_n2o_data.txt -> simGCfilterCorrFunctions")
+      stop("ERROR stopping the script")
+    }
+  }
+
   # absolute path to logfile
   logfile_n2o_DataCorrection <- paste0(warningDir, "/", logfile_n2o_DataCorrection)
 
@@ -77,11 +85,18 @@ f_correct_n2o_conc_sorted_by_measurementperiods <- function(measPeriodMetaDataDF
   #               by default do not remove these measurements, and interpolate them with function 06
   #    if number of measurements exceeds maxProportionFlatLines: set mp invalid
   if (measPeriodMetaDataDF_mID$is_mp_valid > 0){
-    if (simulateGCdata == "F" | (simulateGCdata == "T" & 5 %in% simGCfilterCorrFunctions & 6 %in% simGCfilterCorrFunctions)){
+    # the interpolation of flatlines might be disabled for simGC (GC simulation)
+    flatLineInterpolationEnabled <- "T"
+    if (simulateGCdata == "F" | (simulateGCdata == "T" & 5 %in% simGCfilterCorrFunctions)){
+      # in case interpolation of flatlines is disabled (with simGC), then the gas concentration values of flatlines are not replaced by 'constantToReplaceFlatLines_short'
+      if (simulateGCdata == "T" & ! 6 %in% simGCfilterCorrFunctions){
+        flatLineInterpolationEnabled <- "F"
+      }
       list_hlp <- data_filter_function_05(measPeriodMetaDataDF_mID, measPeriodN2o_mID,
                                           measID, logfile_n2o_DataCorrection,
                                           maxDiffFlatLineFromMedian_short, minFlatLineLengthSec,
-                                          removeFlatLines_short, constantToReplaceFlatLines_short, maxProportionFlatLines)
+                                          removeFlatLines_short, flatLineInterpolationEnabled,
+                                          constantToReplaceFlatLines_short, maxProportionFlatLines)
       measPeriodMetaDataDF_mID  <- as.data.frame(list_hlp[[1]])
       measPeriodN2o_mID         <- as.data.frame(list_hlp[[2]])
     }
@@ -345,6 +360,7 @@ data_filter_function_05 <- function(measPeriodMetaDataDF_mID,
                                     maxDiffFlatLineFromMedian_short,
                                     minFlatLineLengthSec,
                                     removeFlatLines_short,
+                                    flatLineInterpolationEnabled,
                                     constantToReplaceFlatLines_short,
                                     maxProportionFlatLines){
   
@@ -426,8 +442,10 @@ data_filter_function_05 <- function(measPeriodMetaDataDF_mID,
     if (removeFlatLines_short == "T"){
       measPeriodN2o_mID <- measPeriodN2o_mID[-flatLineRowsRemove,]
     } else if(removeFlatLines_short == "F"){
-      # mark rows for 'data_filter_function_06' with a specific constant
-      measPeriodN2o_mID$n2oppm_aeris[flatLineRowsRemove]      <- constantToReplaceFlatLines_short
+      # mark rows for "flatline interpolation" ('data_filter_function_06') with a specific constant, if interpolation is enabled
+      if (flatLineInterpolationEnabled == "T"){
+        measPeriodN2o_mID$n2oppm_aeris[flatLineRowsRemove]      <- constantToReplaceFlatLines_short
+      }
       # mark rows in an additional column for later analysis
       measPeriodN2o_mID$detected_flatline[flatLineRowsRemove] <- 1
     }
diff --git a/preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R b/preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R
index 2965478..e99ba25 100644
--- a/preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R
+++ b/preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R
@@ -8,7 +8,19 @@ f_sample_gas_concentrations_for_gc_simulation_per_mp <- function(mpGasConcDataDF
                                                                  logfile_gas_conc_sampling,
                                                                  username){
 
+  print(paste0(">> start data sampling for GC simulation for mp: ", measID))
 
+  # load the info file for simulateGCdata (in particular 'simGCfilterCorrFunctions')
+  source("additional_info/simulate_gc_n2o_data.txt")
 
 
+
+
+
+
+
+  # NOTE: logfile with
+  # one line: measID, sample_time_sec, median_value, sample_time_is_flatline, value_of_median_calc_is_flatline
+
+  return(mpGasConcDataDF[1,])
 }
\ No newline at end of file
diff --git a/preprocessing/main_script.R b/preprocessing/main_script.R
index 72a6a08..0c7761f 100644
--- a/preprocessing/main_script.R
+++ b/preprocessing/main_script.R
@@ -262,45 +262,75 @@ if (simulateGCdata == "T"){
   print(">> start sampling data for GC simulation")
 
   # gas-concentration data sampling for simulation of GC (gas chromatograph)
+  #   this makes available the "additional_info/simulate_gc_n2o_data.txt"
   source("functions/sample_gas_concentrations_for_gc_simulation_per_mp.R")
+  
   # create new empty DF for sampled data
   simGCsamplesDF <- data.frame()
-  # create logfile name
+  
+  # create dir for logfile, if not present
+  if (!file.exists(simGCsampleDatainfoDir)) {
+    dir.create(simGCsampleDatainfoDir, showWarnings = TRUE, recursive = FALSE, mode = "0755")
+  }
+
+  # create logfile name and full path
   logfile_simGC_gas_conc_data_sampling <- paste0("sampling_gas_concentrations_for_gc_simulation_", measCamp, "_", currentDateTime, ".log")
+  logfile_simGC_gas_conc_data_sampling <- paste0(simGCsampleDatainfoDir, "/", logfile_simGC_gas_conc_data_sampling)
 
-  # TODO implement:
-    # * add a new column to the measPeriodN2o DF:
-    #   * values [0,1] 
-    #   * mark all data points that were detected as (short) flatline with 1
-    #   * add this to data_filter_function_05
-
-  # call function
-  #   use only valid mp
-  #   write logfile (each sample value & whether it is part of flatline or value(s) for median() were taken from flatline)
-  #   this function needs to check once that:
-    # length(simGCsampleTimeSec) > 0
-    # simGCsampleTimeSec[1] is larger than simGCtimespanBeforeAfterSampleSec
-    # simGCsampleTimeSec[last] is less than "simGCminLengthMPSec - simGCtimespanBeforeAfterSampleSec"
-    # simGCsampleTimeSec[1] is lowest value of simGCsampleTimeSec
-    # simGCsampleTimeSec[last] is largest value of simGCsampleTimeSec
-  # then: loop over valid mp and run sampling
-  #   rebuild the measPeriodN2oCorr DF but replace the values of the samples with the median() values
-  # new DF: simGCsamplesDF
+  # test if settings for simGC are valid
+  # number of samples must be larger zero
+  if (length(simGCsampleTimeSec) < 1){
+    print("ERROR no samples were defined in simulate_gc_n2o_data.txt -> simGCsampleTimeSec")
+    stop("ERROR stopping the script")
+  }
+  # 'simGCsampleTimeSec[1]' is larger than simGCtimespanBeforeAfterSampleSec
+  if (simGCsampleTimeSec[1] < (simGCtimespanBeforeAfterSampleSec + 1)){
+    print(paste0("ERROR the first simGC sample is too close to the start of the mp, closer than possible for median() calculation: ", (simGCtimespanBeforeAfterSampleSec + 1)))
+    stop("ERROR stopping the script")
+  }
+  # 'simGCsampleTimeSec[last]' is larger than "simGCminLengthMPSec - simGCtimespanBeforeAfterSampleSec"
+  if (simGCsampleTimeSec[length(simGCsampleTimeSec)] > (simGCminLengthMPSec - simGCtimespanBeforeAfterSampleSec - 1)){
+    print(paste0("ERROR the last simGC sample is too close to the end of the mp, closer than possible for median() calculation: ", (simGCminLengthMPSec - simGCtimespanBeforeAfterSampleSec - 1)))
+    stop("ERROR stopping the script")
+  }
+  # values of 'simGCsampleTimeSec' are increasing
+  if (length(simGCsampleTimeSec) > 1){
+    print("  simGC sampling is done per mp at [time in seconds]:")
+    for (i in 1:(length(simGCsampleTimeSec) - 1)){
+      if (simGCsampleTimeSec[i + 1] > simGCsampleTimeSec[i]){
+        print(paste0("    ", simGCsampleTimeSec[i]))
+      } else {
+        print(paste0("ERROR values of simGCsampleTimeSec are NOT increasing: ", simGCsampleTimeSec))
+        stop("ERROR stopping the script")
+      }
+    }
+    # print the last sample value
+    print(paste0("    ", simGCsampleTimeSec[length(simGCsampleTimeSec)]))
+  }
 
+  # write header in logfile
+  write("measID sample_time_sec median_value sample_time_is_flatline value_of_median_calc_is_flatline",
+        file = logfile_simGC_gas_conc_data_sampling, sep = " ")
 
   # loop over all mp with N2O data
   for (row in 1:nrow(measPeriodMetaDataDF)){
-    # for now only with Aeris data
-    if (measPeriodMetaDataDF$is_n2o_aeris[row] == 1){
+    # only valid mp and for now only with Aeris data
+    if (measPeriodMetaDataDF$is_mp_valid[row] == 1 & measPeriodMetaDataDF$is_n2o_aeris[row] == 1){
       measID            <- measPeriodMetaDataDF$meas_ID[row]
       rowsGasConcData   <- which(measPeriodN2oCorr$meas_ID == measID)
-      # returns data of this mp for the new DF
-      simGCsamplesDFmp  <- f_sample_gas_concentrations_for_gc_simulation_per_mp(measPeriodN2oCorr[rowsGasConcData,],
-                                                                                measID,
-                                                                                mainDir,
-                                                                                logfile_simGC_gas_conc_data_sampling,
-                                                                                username)
-      simGCsamplesDF  <- rbind(simGCsamplesDF, as.data.frame(simGCsamplesDFmp))
+      # only mp with a length of at least 'simGCminLengthMPSec'
+      dateTimeFirstMeasurementMP <- measPeriodN2oCorr$datetime_datalogger[rowsGasConcData[1]]
+      dateTimeLastMeasurementMP  <- measPeriodN2oCorr$datetime_datalogger[length(rowsGasConcData)]
+      timeLenghtMPSec          <- as.numeric(abs(difftime(dateTimeFirstMeasurementMP, dateTimeLastMeasurementMP, units = "secs")))
+      if (timeLenghtMPSec > (simGCminLengthMPSec - 1)){
+        # returns data of this mp for the new DF
+        simGCsamplesDFmp  <- f_sample_gas_concentrations_for_gc_simulation_per_mp(measPeriodN2oCorr[rowsGasConcData,],
+                                                                                  measID,
+                                                                                  mainDir,
+                                                                                  logfile_simGC_gas_conc_data_sampling,
+                                                                                  username)
+        simGCsamplesDF  <- rbind(simGCsamplesDF, as.data.frame(simGCsamplesDFmp))
+      }
     }
   }
   # sequence row number of simGCsamplesDF to 1:x
@@ -311,11 +341,11 @@ if (simulateGCdata == "T"){
 print("")
 print(">> create DF with only valid measurement periods")
 
-# get the respective DF
+# get the gas concentration DF: either all data or the samples for the GC simulation
 if (simulateGCdata == "F"){
-  measPeriodN2oSection7inDF <- measPeriodN2oCorr
+  selValidMPinputDF <- measPeriodN2oCorr
 } else if (simulateGCdata == "T"){
-  measPeriodN2oSection7inDF <- simGCsamplesDF
+  selValidMPinputDF <- simGCsamplesDF
 }
 
 # collect all rows of invalid mp
@@ -328,7 +358,7 @@ for (row in 1:nrow(measPeriodMetaDataDF)){
     # only invalid mp
     if (measPeriodMetaDataDF$is_mp_valid[row] == 0){
       measID            <- measPeriodMetaDataDF$meas_ID[row]
-      rowsInvalidMP     <- which(measPeriodN2oSection7inDF$meas_ID == measID)
+      rowsInvalidMP     <- which(selValidMPinputDF$meas_ID == measID)
       rowsAllInvalidMP  <- c(rowsAllInvalidMP, rowsInvalidMP)
       listAllInvalidMP  <- c(listAllInvalidMP, measID)
     }
@@ -337,24 +367,23 @@ for (row in 1:nrow(measPeriodMetaDataDF)){
 
 # copy only valid mp to new DF
 if (length(rowsAllInvalidMP) > 0){
-  measPeriodN2oSection7outDF <- measPeriodN2oSection7inDF[-rowsAllInvalidMP, ]
+  selValidMPoutputDF <- selValidMPinputDF[-rowsAllInvalidMP, ]
   # print number of mp from DF
-  print(paste0("   number of valid and invalid mp: ", length(unique(measPeriodN2oSection7inDF$meas_ID))))
-  print(paste0("   number of valid mp:             ", length(unique(measPeriodN2oSection7outDF$meas_ID))))
+  print(paste0("   number of valid and invalid mp: ", length(unique(selValidMPinputDF$meas_ID))))
+  print(paste0("   number of valid mp:             ", length(unique(selValidMPoutputDF$meas_ID))))
   print(paste0("   list of ", length(listAllInvalidMP)," invalid mp: "))
   print(listAllInvalidMP)
-  # sequence row number of measPeriodN2oSection7outDF to 1:x
-  rownames(measPeriodN2oSection7outDF) <- seq(1:nrow(measPeriodN2oSection7outDF))
+  # sequence row number of selValidMPoutputDF to 1:x
+  rownames(selValidMPoutputDF) <- seq(1:nrow(selValidMPoutputDF))
 } else{
-  measPeriodN2oSection7outDF <- measPeriodN2oSection7inDF
+  selValidMPoutputDF <- selValidMPinputDF
   print("   did not find any invalid mp")
 }
 
 # 8. Pass data DF to new DF used for calculations ----
 
 # create DF used with analysis and calculations
-ppN2OdataDF <- measPeriodN2oSection7outDF
-
+ppN2OdataDF <- selValidMPoutputDF
 
 # =============================================================================================== #
 # ------------------------------- End data pre-processing -----------------------------------------
-- 
GitLab


From db2ef83c1e94a2d22ad091cc7a25138959a6843c Mon Sep 17 00:00:00 2001
From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de>
Date: Wed, 7 Feb 2024 22:59:02 +0100
Subject: [PATCH 6/9] finished implementation of sampling gas concentration
 data for GC simulation

* tested:
  * the created DF looks good
  * the logfile contains the expected info
---
 ..._concentrations_for_gc_simulation_per_mp.R | 58 +++++++++++++++++--
 preprocessing/main_script.R                   |  2 +-
 2 files changed, 53 insertions(+), 7 deletions(-)

diff --git a/preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R b/preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R
index e99ba25..7a76543 100644
--- a/preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R
+++ b/preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R
@@ -13,14 +13,60 @@ f_sample_gas_concentrations_for_gc_simulation_per_mp <- function(mpGasConcDataDF
   # load the info file for simulateGCdata (in particular 'simGCfilterCorrFunctions')
   source("additional_info/simulate_gc_n2o_data.txt")
 
+  # sequence row numbers of mpGasConcDataDF to 1:x
+  rownames(mpGasConcDataDF) <- seq(1:nrow(mpGasConcDataDF))
 
+  # get date & time of first measurement
+  dateTimeFirstMeasurement <- mpGasConcDataDF$datetime_datalogger[1]
+  # row numbers that contain the samples
+  sampleRows <- rep(NA, length(simGCsampleTimeSec))
 
+  # loop over sample times
+  for (i in 1:length(simGCsampleTimeSec)){
+    sampleTimeSec         <- simGCsampleTimeSec[i]
+    # get date/time of sample
+    absoluteSampleTime    <- dateTimeFirstMeasurement + as.difftime(sampleTimeSec, units="secs")
+    # get rows of measurements used for calc the median
+    measurementsStartTime <- absoluteSampleTime - as.difftime((simGCtimespanBeforeAfterSampleSec + 1), units="secs")
+    measurementsLastTime  <- absoluteSampleTime + as.difftime((simGCtimespanBeforeAfterSampleSec + 1), units="secs")
+    measurementRows       <- which(mpGasConcDataDF$datetime_datalogger > measurementsStartTime &
+                                   mpGasConcDataDF$datetime_datalogger < measurementsLastTime)
+    # get row of this sample
+    sampleRowNumber    <- which(mpGasConcDataDF$datetime_datalogger == absoluteSampleTime)
+    # if there is no measurement at this exact time, use the middle row of the 'measurementRows'
+    if (length(sampleRowNumber) < 1){
+      sampleRowNumber <- floor(median(measurementRows))
+    }
+    # save row numer
+    sampleRows[i] <- sampleRowNumber
+    # calc median of measurements of N2O gas concentration
+    sampleGasConcMedian <- median(mpGasConcDataDF$n2oppm_aeris[measurementRows], na.rm = TRUE)
+    # add new value to DF
+    mpGasConcDataDF$n2oppm_aeris[sampleRowNumber] <- sampleGasConcMedian
+    # calc standard deviation of measurements of N2O gas concentration
+    sampleGasConcSD     <- sd(mpGasConcDataDF$n2oppm_aeris[measurementRows], na.rm = TRUE)
+    # test if one of the samples is part of a short flatline
+    sample_time_is_flatline             <- "F"
+    any_meas_of_median_calc_is_flatline <- "F"
+    if (mpGasConcDataDF$detected_flatline[sampleRowNumber] > 0){
+      sample_time_is_flatline             <- "T"
+    }
+    if (sum(mpGasConcDataDF$detected_flatline[measurementRows]) > 0){
+      any_meas_of_median_calc_is_flatline <- "T"
+    }
+    # write one line to logfile with columns:
+    # measID sample_time_sec median_value median_value_sd sample_time_is_flatline any_meas_of_median_calc_is_flatline
+    write(paste0(measID, " ",
+                 sampleTimeSec, " ",
+                 sampleGasConcMedian, " ",
+                 sampleGasConcSD, " ",
+                 sample_time_is_flatline, " ",
+                 any_meas_of_median_calc_is_flatline),
+                 file = logfile_gas_conc_sampling, sep = " ", append = TRUE)
+  }
 
+  # return only the sample rows
+  mpGasConcDataDF <- mpGasConcDataDF[sampleRows,]
 
-
-
-  # NOTE: logfile with
-  # one line: measID, sample_time_sec, median_value, sample_time_is_flatline, value_of_median_calc_is_flatline
-
-  return(mpGasConcDataDF[1,])
+  return(mpGasConcDataDF)
 }
\ No newline at end of file
diff --git a/preprocessing/main_script.R b/preprocessing/main_script.R
index 0c7761f..9b8d627 100644
--- a/preprocessing/main_script.R
+++ b/preprocessing/main_script.R
@@ -309,7 +309,7 @@ if (simulateGCdata == "T"){
   }
 
   # write header in logfile
-  write("measID sample_time_sec median_value sample_time_is_flatline value_of_median_calc_is_flatline",
+  write("measID sample_time_sec median_value median_value_sd sample_time_is_flatline any_meas_of_median_calc_is_flatline",
         file = logfile_simGC_gas_conc_data_sampling, sep = " ")
 
   # loop over all mp with N2O data
-- 
GitLab


From 9e40e3e179423b66698d2d8ef3fbd74bccdbe11b Mon Sep 17 00:00:00 2001
From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de>
Date: Wed, 7 Feb 2024 23:10:45 +0100
Subject: [PATCH 7/9] minor docu improvements

---
 .../sample_gas_concentrations_for_gc_simulation_per_mp.R      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R b/preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R
index 7a76543..98e3e42 100644
--- a/preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R
+++ b/preprocessing/functions/sample_gas_concentrations_for_gc_simulation_per_mp.R
@@ -33,11 +33,11 @@ f_sample_gas_concentrations_for_gc_simulation_per_mp <- function(mpGasConcDataDF
                                    mpGasConcDataDF$datetime_datalogger < measurementsLastTime)
     # get row of this sample
     sampleRowNumber    <- which(mpGasConcDataDF$datetime_datalogger == absoluteSampleTime)
-    # if there is no measurement at this exact time, use the middle row of the 'measurementRows'
+    # if there is no measurement at this exact time, use the center-row of the 'measurementRows'
     if (length(sampleRowNumber) < 1){
       sampleRowNumber <- floor(median(measurementRows))
     }
-    # save row numer
+    # save row number
     sampleRows[i] <- sampleRowNumber
     # calc median of measurements of N2O gas concentration
     sampleGasConcMedian <- median(mpGasConcDataDF$n2oppm_aeris[measurementRows], na.rm = TRUE)
-- 
GitLab


From a898c97e7aa9dfe455d5494f9718b957c04e2814 Mon Sep 17 00:00:00 2001
From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de>
Date: Wed, 14 Feb 2024 23:41:24 +0100
Subject: [PATCH 8/9] bugfix in calculation of mp length for simGC function

---
 preprocessing/main_script.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/preprocessing/main_script.R b/preprocessing/main_script.R
index 2b76f45..7785002 100644
--- a/preprocessing/main_script.R
+++ b/preprocessing/main_script.R
@@ -322,8 +322,8 @@ if (simulateGCdata == "T"){
       rowsGasConcData   <- which(measPeriodN2oCorr$meas_ID == measID)
       # only mp with a length of at least 'simGCminLengthMPSec'
       dateTimeFirstMeasurementMP <- measPeriodN2oCorr$datetime_datalogger[rowsGasConcData[1]]
-      dateTimeLastMeasurementMP  <- measPeriodN2oCorr$datetime_datalogger[length(rowsGasConcData)]
-      timeLenghtMPSec          <- as.numeric(abs(difftime(dateTimeFirstMeasurementMP, dateTimeLastMeasurementMP, units = "secs")))
+      dateTimeLastMeasurementMP  <- measPeriodN2oCorr$datetime_datalogger[rowsGasConcData[length(rowsGasConcData)]]
+      timeLenghtMPSec            <- as.numeric(abs(difftime(dateTimeFirstMeasurementMP, dateTimeLastMeasurementMP, units = "secs")))
       if (timeLenghtMPSec > (simGCminLengthMPSec - 1)){
         # returns data of this mp for the new DF
         simGCsamplesDFmp  <- f_sample_gas_concentrations_for_gc_simulation_per_mp(measPeriodN2oCorr[rowsGasConcData,],
-- 
GitLab


From c1716031227a96ca56659136f33ea5c408fdec21 Mon Sep 17 00:00:00 2001
From: Nathalie Triches <ntriches@bgc-jena.mpg.de>
Date: Thu, 15 Feb 2024 10:37:23 +0200
Subject: [PATCH 9/9] test GA-PGA comparison and change values in txt file

* works well for Sept and July, apparently now for May, too :)
---
 preprocessing/additional_info/simulate_gc_n2o_data.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/preprocessing/additional_info/simulate_gc_n2o_data.txt b/preprocessing/additional_info/simulate_gc_n2o_data.txt
index fe93b58..3d1eb18 100644
--- a/preprocessing/additional_info/simulate_gc_n2o_data.txt
+++ b/preprocessing/additional_info/simulate_gc_n2o_data.txt
@@ -13,7 +13,7 @@ simGCminLengthMPSec               <- 560  # [seconds] NOTE the 'data_filter_func
 
 # time in seconds the GC samples are taken (per mp; from mp start)
 # this also defines the number of samples
-simGCsampleTimeSec                <- c(100, 200, 300, 400, 500)  # time [s] after mp start for taking samples
+simGCsampleTimeSec                <- c(60, 180, 300, 420, 550)  # time [s] after mp start for taking samples
 
 # time in seconds before and after a sampling point to calculate (median) sampling value
 simGCtimespanBeforeAfterSampleSec <- 5   # timespan before and after a sample used for calc of sample value
-- 
GitLab