From d8ebb98ae0b94397f971f68753340aa131afc7a1 Mon Sep 17 00:00:00 2001
From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de>
Date: Mon, 19 Feb 2024 15:30:36 +0100
Subject: [PATCH 01/24] multiple changes in code, new files, splitting the main
 script, new run_all_main.R

* the pre-processing code needed some re-work, as it is now called in a function
  and the "global environment" does not work anymore
* testing:
  * data preprocessing runs for 202307
  * with the two modes: normal & simGC
* next step:
  * modify measPeriod ID
  * run pre-processing for all 2023 measCamp
---
 preprocessing/functions/plot_ghg_conc_2023.R  |  42 +--
 preprocessing/functions/read_raw_data_2023.R  |  10 +-
 .../functions/sort_n2o_conc_by_meas_period.R  |   9 +-
 .../sort_n2o_conc_by_meas_period_2023.R       |  26 +-
 ...ain_script.R => main_data_preprocessing.R} | 263 +++++-------------
 preprocessing/main_flux_analysis.R            |  27 ++
 preprocessing/main_flux_calculation.R         |  32 +++
 preprocessing/run_all_main.R                  | 148 ++++++++++
 preprocessing/run_main_script.sh              |  27 --
 9 files changed, 325 insertions(+), 259 deletions(-)
 rename preprocessing/{main_script.R => main_data_preprocessing.R} (62%)
 create mode 100644 preprocessing/main_flux_analysis.R
 create mode 100644 preprocessing/main_flux_calculation.R
 create mode 100644 preprocessing/run_all_main.R
 delete mode 100755 preprocessing/run_main_script.sh

diff --git a/preprocessing/functions/plot_ghg_conc_2023.R b/preprocessing/functions/plot_ghg_conc_2023.R
index 12fa6e0..73d764c 100644
--- a/preprocessing/functions/plot_ghg_conc_2023.R
+++ b/preprocessing/functions/plot_ghg_conc_2023.R
@@ -112,7 +112,6 @@ for (i in unique(meas_ID)){
 ################################################################################
 ################# Plots for N2O ################################################
 ################################################################################
-if (process_n2o == "T"){
 
 # # enable plot comparison (copy-paste from 20230414_simple_habitat_comparison)
 # # code needs to be cleaned! 
@@ -234,22 +233,32 @@ if (process_n2o == "T"){
 
 
 
-# non-modified measurement periods (5 min)
-meas_ID <- measPeriodN2o$meas_ID
+# plot non-modified measurement periods (5 min)
+f_plot_ghg_conc_2023_n2o_non_mod_mp <- function(process_n2o,
+                                                figsGasConcDir,
+                                                expYear,
+                                                measPeriodN2o){
 
-for (i in unique(meas_ID)){
-  fileID <- i
-  d <- subset(measPeriodN2o, meas_ID == i)
-  png(filename=paste0(figsGasConcDir, "/", expYear, "/n2o_mp_", fileID, ".png"), 
-      height=4, width=8, pointsize=8, bg="white", units="in", res=200)
-  par(mfrow = c(1,2))
-  plot(d$datetime_datalogger, d$n2oppm_aeris)
-  title(main = i)
-  # hist(d$n2oppm_aeris, breaks = 15)
-  boxplot(d$n2oppm_aeris)
-  #plot(d$airTdegC)
-  par(mfrow = c(1,1))
-  dev.off()
+  print(">> plot GHG concentrations N2O")
+
+  if (process_n2o == "T"){
+    meas_ID <- measPeriodN2o$meas_ID
+
+    for (i in unique(meas_ID)){
+      fileID <- i
+      d <- subset(measPeriodN2o, meas_ID == i)
+      png(filename=paste0(figsGasConcDir, "/", expYear, "/n2o_mp_", fileID, ".png"), 
+          height=4, width=8, pointsize=8, bg="white", units="in", res=200)
+      par(mfrow = c(1,2))
+      plot(d$datetime_datalogger, d$n2oppm_aeris)
+      title(main = i)
+      # hist(d$n2oppm_aeris, breaks = 15)
+      boxplot(d$n2oppm_aeris)
+      #plot(d$airTdegC)
+      par(mfrow = c(1,1))
+      dev.off()
+    }
+  }
 }
 
 # # modified measurement periods: -20 seconds in the start, -10 seconds in the end
@@ -271,4 +280,3 @@ for (i in unique(meas_ID)){
 #   dev.off()
 # }
 
-} # if (process_n2o == "T")
diff --git a/preprocessing/functions/read_raw_data_2023.R b/preprocessing/functions/read_raw_data_2023.R
index 9417001..0ce45f1 100644
--- a/preprocessing/functions/read_raw_data_2023.R
+++ b/preprocessing/functions/read_raw_data_2023.R
@@ -46,7 +46,7 @@ get_datalogger_raw_data  <- function(mainDir, dataDir, measCamp){
       ) 
     
     setwd(mainDir)
-    print("end get_datalogger_raw_data")
+    print("  end get_datalogger_raw_data")
     return(datalogger_data_polished)
     
   } else {
@@ -69,7 +69,7 @@ get_datalogger_raw_data  <- function(mainDir, dataDir, measCamp){
       )   
     
   setwd(mainDir)
-  print("end get_datalogger_raw_data")
+  print("  end get_datalogger_raw_data")
   return(datalogger_data_polished)
   }  
 }    
@@ -139,7 +139,7 @@ get_aeris_raw_data <- function(mainDir, dataDir, measCamp){
   #                                             "%m/%d/%Y %H:%M:%OS %Z", tz="UTC")
   
   setwd(mainDir)
-  print("end get_aeris_raw_data")
+  print("  end get_aeris_raw_data")
   return(aeris_data_polished)
 }
 
@@ -193,7 +193,7 @@ get_field_mp_meta_data_DF <- function(mainDir, dataDir, measCamp){
       select(c("datetime_start","datetime_end"), everything())
     
     setwd(mainDir)
-    print("end get_field_mp_meta_data_DF")
+    print("  end get_field_mp_meta_data_DF")
     return(field_mp_meta_data_DF_polished)
   }  else {
     
@@ -238,7 +238,7 @@ get_field_mp_meta_data_DF <- function(mainDir, dataDir, measCamp){
       select(c("datetime_start","datetime_end"), everything())
     
     setwd(mainDir)
-    print("end get_field_mp_meta_data_DF")
+    print("  end get_field_mp_meta_data_DF")
     return(field_mp_meta_data_DF_polished) 
     
   }
diff --git a/preprocessing/functions/sort_n2o_conc_by_meas_period.R b/preprocessing/functions/sort_n2o_conc_by_meas_period.R
index ac1aa1a..f7373c2 100644
--- a/preprocessing/functions/sort_n2o_conc_by_meas_period.R
+++ b/preprocessing/functions/sort_n2o_conc_by_meas_period.R
@@ -7,10 +7,13 @@
 # one chamber closure time is called one measurement period (short: mp)  
 
 
-create_n2o_data_sorted_by_measurements  <- function(mainDir, dataDir, expYear){
+create_n2o_data_sorted_by_measurements  <- function(mainDir, dataDir, expYear,
+                                                    measPeriodMetaDataDF,
+                                                    ch4_co2_n2o_conc_st_sm_par_data,
+                                                    gasID){
   
   print("start create_n2o_conc_sorted_by_measurements")
-  
+
   # create empty (new) data frame for collection of all measurement periods
   allMeasPeriodDF <- data.frame()
   
@@ -85,7 +88,7 @@ create_n2o_data_sorted_by_measurements  <- function(mainDir, dataDir, expYear){
     # create vector with same amount of rows as measured gas concentrations
     # and all rows containing the mp ID & gas ID for the measurement period
     vec_meas_ID <- rep(measPeriodMetaDataDF$meas_ID[i_mp], nrow(meas_period_DF))
-    vec_gas_ID  <- rep(gas_ID$n2o_aeris, nrow(meas_period_DF))
+    vec_gas_ID  <- rep(gasID, nrow(meas_period_DF))
     # add new columns to meas_period_DF
     meas_period_DF["meas_ID"] <- vec_meas_ID
     meas_period_DF["gas_ID"]  <- vec_gas_ID
diff --git a/preprocessing/functions/sort_n2o_conc_by_meas_period_2023.R b/preprocessing/functions/sort_n2o_conc_by_meas_period_2023.R
index 72b5ac8..9a26e85 100644
--- a/preprocessing/functions/sort_n2o_conc_by_meas_period_2023.R
+++ b/preprocessing/functions/sort_n2o_conc_by_meas_period_2023.R
@@ -7,10 +7,13 @@
 # one chamber closure time is called one measurement period (short: mp)   
 
 
-create_n2o_conc_sorted_by_measurements  <- function(mainDir, dataDir, expYear){
-  
-  print("start create_n2o_conc_sorted_by_mp")
+f_create_n2o_conc_sorted_by_measurements <- function(field_mp_meta_data_DF,
+                                                     measPeriodMetaDataDF,
+                                                     ch4_co2_n2o_conc_st_sm_par_data,
+                                                     gasID){
   
+  print("start f_create_n2o_conc_sorted_by_measurements")
+
   # create empty (new) data frame for collection of all measurement periods
   allMeasPeriodDF <- data.frame()
   
@@ -94,20 +97,19 @@ create_n2o_conc_sorted_by_measurements  <- function(mainDir, dataDir, expYear){
     # get the ch4 concentrations of the particular measurement period 
     #   = start at datetime_start, end at datetime_end + all values in between
     meas_period_DF <- fluxMetaLicor[first_row:last_row, ] 
-    
+
     # add two new columns to meas_period_DF: ID of measurement period and ID of gas
     # create vector with same amount of rows as measured gas concentrations
     # and all rows containing the mp ID & gas ID for the measurement period
     vec_meas_ID <- rep(measPeriodMetaDataDF$meas_ID[i_mp], nrow(meas_period_DF))
-    vec_gas_ID  <- rep(gas_ID$is_n2o_aeris, nrow(meas_period_DF))
+    vec_gas_ID  <- rep(gasID, nrow(meas_period_DF))
     # add new columns to meas_period_DF
     meas_period_DF["meas_ID"] <- vec_meas_ID
     meas_period_DF["gas_ID"]  <- vec_gas_ID
-    
+
     # set gas_ID in measPeriodMetaDataDF 
-    measPeriodMetaDataDF$is_n2o_aeris[i_mp] <<- 1  
-    # (1 == TRUE, no gas_ID); "<<-" means: write to global object, e.g., DF, within function
-    
+    measPeriodMetaDataDF$is_n2o_aeris[i_mp] <- 1
+
     # screen output
     print(paste0("mp meas_ID & gas_ID: ", vec_meas_ID[1], " ", vec_gas_ID[1],
                  " | median of N2O concentration: ", median(meas_period_DF$n2oppm_aeris)))
@@ -117,8 +119,10 @@ create_n2o_conc_sorted_by_measurements  <- function(mainDir, dataDir, expYear){
     allMeasPeriodDF           <- rbind(allMeasPeriodDF, meas_period_DF) 
   }
   
-  print("end create_n2o_conc_sorted_by_mp")
-  return(allMeasPeriodDF)
+  print("  end create_n2o_conc_sorted_by_mp")
+
+  # return metaData and data DF
+  return(list(measPeriodMetaDataDF, allMeasPeriodDF))
 }
 
 
diff --git a/preprocessing/main_script.R b/preprocessing/main_data_preprocessing.R
similarity index 62%
rename from preprocessing/main_script.R
rename to preprocessing/main_data_preprocessing.R
index 7785002..da54b80 100644
--- a/preprocessing/main_script.R
+++ b/preprocessing/main_data_preprocessing.R
@@ -1,106 +1,37 @@
-# Main script for processing data
-# Use: This script is the main script used to automatically process the data 
-# gathered in the field with a data logger and portable gas analysers (GA's)
+#
+# Main script for pre-processing data
+# Use: This script is the main script used to automatically pre-process the data 
+#      gathered in the field with a data logger and portable gas analysers (GA's)
+#
 # 2022: the used GA's are the LICOR Li-7810 for CH2/CH4/H2O concentrations and the AERIS MIRA Ultra N2O/CO2/H2O for N2O conc
+# 2023:
+# 2024:
 
-# Aim: Automatic processing + avoid manual (pre)processing of data in multiple spreadsheets
 
 # =============================================================================================== #
-# script settings ----
+# ------------------------------- Data pre-processing -------------------------------------------
 # =============================================================================================== #
 
-# experiment yaer [YYYY] and month [MM]
-# get arguments from 'preprocessing/run_main_script.sh'
-args <- commandArgs(TRUE)
-if (length(args) > 0){
-  # if arguments are provided (by running './run_main_script.sh')
-  expYear   <-args[1]
-  expMonth  <-args[2]
-} else {
-  # if no arguments are provided (just running this main_script.R)
-  expYear  <- "2023"
-  expMonth <- "07"
-}
-# measurement campaign [YYYYMM]
-measCamp <- paste0(expYear, expMonth)
-# gas type to process [T,F]
-process_co2 <- "F"
-process_ch4 <- "F"
-process_n2o <- "T"
-# how to calculate gas fluxes
-calc_fluxes_with_gasfluxes_package      <- "F"
-calc_fluxes_with_goflux_package <- "T"
-
-# process aeris raw data (depending on measurement campaign) [T,F]
-# FALSE by default
-process_aeris_raw_data <- "F"
-# set to TRUE for individual measurement campaigns
-if (measCamp == "202307") process_aeris_raw_data <- "T"    ## read raw data from Aeris output file
-
-# plot figures [T,F]
-create_plots_ghg_conc <- "T"
-
-# use the PGA data to simulate GC data, and run only specific filter/correction functions
-simulateGCdata <- "F"   # [F,T]
-
-
-# =============================================================================================== #
-# init & get main variables & settings, set directories, change into main dir, load R libraries ----
-# =============================================================================================== #
-
-# set dirs
-# main working directory (version control with gitlab)
-# data dir (files with field data)
-# differentiate between users
-username <- Sys.getenv("USER")
-if (username == "ntriches"){
-  # Nathalie
-  mainDir         <- "/home/ntriches/git_repo/data-analysis/preprocessing"
-  dataDir         <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/data"
-  figsGasConcDir  <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/figures_processing"
-  outDir          <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output"
-} else if (username == "jengel"){
-  # Jan
-  mainDir         <- "/home/jengel/repo_git/nathi_data-analysis/preprocessing"
-  dataDir         <- "/home/jengel/nathalie/20231210-field-data"
-  figsGasConcDir  <- "/home/jengel/nathalie/20231210-figs_gas_conc"
-  outDir          <- "/home/jengel/nathalie/20231210-script-output"
-} else {
-  # error catching
-  stop("ERROR no valid username")
-}
-
-# get date & time (e.g.: '20231211_195800')
-currentDateTime <- format(Sys.time(), "%Y%m%d_%k%M%S")
-
-# change into main dir
-setwd(mainDir)
+f_main_data_preprocessing <- function(username,
+                                      process_co2,
+                                      process_ch4,
+                                      process_n2o,
+                                      create_plots_ghg_conc,
+                                      simulateGCdata,
+                                      mainDir,
+                                      dataDir,
+                                      figsGasConcDir,
+                                      outDir,
+                                      currentDateTime,
+                                      measCamp,
+                                      expYear){
+
+# print info
+print(paste0(">>>> start data pre-processing for measurement campaign: ", measCamp, expYear))
+print("")
 
-# get additional data needed for flux calculation
+# get the gas IDs
 source("additional_info/gas_id.txt")
-source("additional_info/fixed_values.txt")
-
-# load libraries
-# optional install (if not already installed) and load dplyr package
-# if(!require(dplyr)){install.packages('dplyr')}
-library(dplyr)
-#if(!require(tidyverse)){install.packages('tidyverse')}
-library(tidyverse)
-
-if (calc_fluxes_with_gasfluxes_package == "T"){
-  library(gasfluxes)
-}
-if (calc_fluxes_with_goflux_package == "T"){
-  # TODO  move from functions/calculate_fluxes_with_gofluxyourself_package.R to here
-}
-
-# basic R settings
-options("scipen"=100, "digits"=7) # force R not to use exponential notation for number of up to 7 digits
-
-
-# =============================================================================================== #
-# ------------------------------- Start data pre-processing -------------------------------------------
-# =============================================================================================== #
 
 # 1. Import all raw data (measurements and meta-data) ----
 if (expYear == "2022"){
@@ -112,7 +43,7 @@ if (expYear == "2022"){
   # get measurement data
   #   ch4 = methane, co2 = carbon dioxide, conc = concentration, 
   #   st = soil temperature, sm = soil moisture, n2o = nitrous oxide
-  ch4_co2_n2o_conc_st_sm_par_data <- get_datalogger_raw_data(mainDir, dataDir, measCamp) 
+  ch4_co2_n2o_conc_st_sm_par_data <- get_datalogger_raw_data(mainDir, dataDir, measCamp)
   if (process_aeris_raw_data == "T"){
     co2_n2o_conc_data             <- get_aeris_raw_data(mainDir, dataDir, measCamp)
   }
@@ -125,30 +56,35 @@ if (expYear == "2022"){
 source("functions/create_meas_period_metadata_df.R")
 measPeriodMetaDataDF        <- create_measPeriodMetaDataDF(expYear, field_mp_meta_data_DF)
 
-
 # 3. Sort measurement data per measurement period and gas ----
 
 #--- 2022 -----------------------------------------------------------------------
 if (expYear == "2022"){
-  if (process_co2 == "T"){
-    source("functions/sort_co2_conc_by_meas_period.R")
-    # create DF with licor CO2 measured data, sorted by meas_ID
-    # uses: measPeriodMetaDataDF
-    measPeriodCo2               <- create_co2_conc_sorted_by_mp(mainDir, dataDir, expYear)
-  }
-
-  if (process_ch4 == "T"){
-    source("functions/sort_ch4_conc_by_meas_period.R")
-    # create DF with licor CH4 measured data, sorted by meas_ID
-    # uses: measPeriodMetaDataDF
-    measPeriodCh4               <- create_ch4_conc_sorted_by_mp(mainDir, dataDir, expYear)
-  }
+  # if (process_co2 == "T"){
+  #   source("functions/sort_co2_conc_by_meas_period.R")
+  #   # create DF with licor CO2 measured data, sorted by meas_ID
+  #   # uses: measPeriodMetaDataDF
+  #   measPeriodCo2               <- create_co2_conc_sorted_by_mp(mainDir, dataDir, expYear,
+  #                                                               measPeriodMetaDataDF,
+  #                                                               ch4_co2_n2o_conc_st_sm_par_data)
+  # }
+
+  # if (process_ch4 == "T"){
+  #   source("functions/sort_ch4_conc_by_meas_period.R")
+  #   # create DF with licor CH4 measured data, sorted by meas_ID
+  #   # uses: measPeriodMetaDataDF
+  #   measPeriodCh4               <- create_ch4_conc_sorted_by_mp(mainDir, dataDir, expYear,
+  #                                                               measPeriodMetaDataDF,
+  #                                                               ch4_co2_n2o_conc_st_sm_par_data)
+  # }
 
   if (process_n2o == "T"){
     source("functions/sort_n2o_conc_by_meas_period.R")
     # create DF with Aeris N2O measured data, sorted by meas_ID
-    # uses: measPeriodMetaDataDF
-    measPeriodN2o               <- create_n2o_data_sorted_by_measurements(mainDir, dataDir, expYear)
+    measPeriodN2o               <- create_n2o_data_sorted_by_measurements(mainDir, dataDir, expYear,
+                                                                          measPeriodMetaDataDF,
+                                                                          ch4_co2_n2o_conc_st_sm_par_data,
+                                                                          gas_ID$n2o_aeris)
   }
 #--- 2023 -----------------------------------------------------------------------
 } else if (expYear == "2023"){
@@ -169,8 +105,15 @@ if (expYear == "2022"){
   if (process_n2o == "T"){
     source("functions/sort_n2o_conc_by_meas_period_2023.R")
     # create DF with Aeris N2O measured data, sorted by meas_ID
-    # uses: measPeriodMetaDataDF
-    measPeriodN2o               <- create_n2o_conc_sorted_by_measurements(mainDir, dataDir, expYear)
+    list_hlp <- f_create_n2o_conc_sorted_by_measurements(field_mp_meta_data_DF,
+                                                         measPeriodMetaDataDF,
+                                                         ch4_co2_n2o_conc_st_sm_par_data,
+                                                         gas_ID$n2o_aeris)
+
+    # extract the two DF from list
+    measPeriodN2o         <- data.frame()
+    measPeriodMetaDataDF  <- as.data.frame(list_hlp[[1]])
+    measPeriodN2o         <- as.data.frame(list_hlp[[2]])
   }
 }
 
@@ -186,6 +129,10 @@ if (create_plots_ghg_conc == "T"){
   #--- 2023 -----------------------------------------------------------------------
   } else if (expYear == "2023"){
     source("functions/plot_ghg_conc_2023.R")
+    f_plot_ghg_conc_2023_n2o_non_mod_mp(process_n2o,
+                                        figsGasConcDir,
+                                        expYear,
+                                        measPeriodN2o)
   }
 }
 
@@ -264,9 +211,10 @@ if (simulateGCdata == "T"){
   print(">> start sampling data for GC simulation")
 
   # gas-concentration data sampling for simulation of GC (gas chromatograph)
-  #   this makes available the "additional_info/simulate_gc_n2o_data.txt"
   source("functions/sample_gas_concentrations_for_gc_simulation_per_mp.R")
-  
+  # load the info file for simulateGCdata
+  source("additional_info/simulate_gc_n2o_data.txt")
+
   # create new empty DF for sampled data
   simGCsamplesDF <- data.frame()
   
@@ -382,85 +330,8 @@ if (length(rowsAllInvalidMP) > 0){
   print("   did not find any invalid mp")
 }
 
-# 8. Pass data DF to new DF used for calculations ----
-
-# create DF used with analysis and calculations
-ppN2OdataDF <- selValidMPoutputDF
-
-# =============================================================================================== #
-# ------------------------------- End data pre-processing -----------------------------------------
-# =============================================================================================== #
-
-
-# =============================================================================================== #
-# ------------------------------- Flux calculation ----------------------------------------------
-# =============================================================================================== #
-
-# 9. Create input for goFlux package ----
-source("functions/create_input_for_goflux_package.R")
-# output: goflux_input_n2o_df + csv file 
-# (to do ONLY IF IT DOESN'T RUN AUTOMATICALLY : change output name of csv file: add current date (not time))
-
-# 10. Calculate fluxes with goFlux package ----
-if (calc_fluxes_with_goflux_package == "T"){
-  print("")
-  print(">> prepare DF for and calculate fluxes with the goflux package")
-  
-  # calc fluxes
-  # simply source the file and run code (not a function yet)
-  # output best_n2o_flux_df + pdf scatterplots and a .csv file saved to 'outDir'
-  source("functions/calculate_fluxes_with_goflux_package.R")
-}
-# to do's: add date + measCamp to output pdf (line 35) and .csv file (same as above 8.)
-# output should go to /home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output/flux_analyses/202305/
-# ofc, there are folders 202307 and 202309, too. 
-# starts from line 33 onwards. Is currently stored in the wrong place, not sure how to change it
-
-
-# =============================================================================================== #
-# ------------------------------- Data analysis ----------------------------------------------
-# =============================================================================================== #
-
-# 11. Combine all data and meta data----
-# output: one dataframe that can be used for data analysis and visualisation
-# source("/functions/combine_all_data_and_meta_data.R")
-# to do: probably adjust code so it can run automatically
-# change output: add measCamp and date (line 124-134)
-# output should be stored here: /home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output/flux_analyses/all/
-
-# 12. Combine all measCamps ---- 
-
-# 13. Visualise data ----
-# does not need to run automatically
-#source("/functions/combine_all_meas_campaigns.R")
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-# important!!! all is in UTC but needs to be in Sweden time in the end
 
+# return metaData and data DF
+return(list(measPeriodMetaDataDF, selValidMPoutputDF))
 
-# Bsp von how to modify data function <- do not write in main script
-# but as separate function
-# source("modify_data.r")
-# licor_data_mod <- modify_licor_data(licor_data)
-# 
-# 
-# ## "modify_data.r"
-# modify_licor_data <- function(licor_data){
-# 
-#   licor_data_mod <- licor_data +1
-# 
-#   return(licor_data_mod)
-# 
-# }
+} # end of f_main_data_preprocessing()
diff --git a/preprocessing/main_flux_analysis.R b/preprocessing/main_flux_analysis.R
new file mode 100644
index 0000000..04da2f0
--- /dev/null
+++ b/preprocessing/main_flux_analysis.R
@@ -0,0 +1,27 @@
+#
+# Main script for analysis of fluxes calculated with 'main_flux_calculation.R'
+#
+#
+
+
+# =============================================================================================== #
+# ------------------------------- Flux analysis -------------------------------------------------
+# =============================================================================================== #
+
+# 1. Combine all data and meta data----
+# output: one dataframe that can be used for data analysis and visualisation
+# source("/functions/combine_all_data_and_meta_data.R")
+# to do: probably adjust code so it can run automatically
+# change output: add measCamp and date (line 124-134)
+# output should be stored here: /home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output/flux_analyses/all/
+
+# 2. Combine all measCamps ---- 
+
+# 3. Visualise data ----
+# does not need to run automatically
+#source("/functions/combine_all_meas_campaigns.R")
+
+
+# TODO ?
+#   important!!! all is in UTC but needs to be in Sweden time in the end
+
diff --git a/preprocessing/main_flux_calculation.R b/preprocessing/main_flux_calculation.R
new file mode 100644
index 0000000..6562668
--- /dev/null
+++ b/preprocessing/main_flux_calculation.R
@@ -0,0 +1,32 @@
+#
+# Main script for calculating the gas fluxes from the data prepared with 'main_preprocessing.R'
+#
+#
+
+
+# =============================================================================================== #
+# ------------------------------- Flux calculation ----------------------------------------------
+# =============================================================================================== #
+
+# 1. Create input for goFlux package ----
+source("functions/create_input_for_goflux_package.R")
+# output: goflux_input_n2o_df + csv file 
+# (to do ONLY IF IT DOESN'T RUN AUTOMATICALLY : change output name of csv file: add current date (not time))
+
+# 2. Calculate fluxes with goFlux package ----
+if (calc_fluxes_with_goflux_package == "T"){
+  print("")
+  print(">> prepare DF for and calculate fluxes with the goflux package")
+  
+  # calc fluxes
+  # simply source the file and run code (not a function yet)
+  # output best_n2o_flux_df + pdf scatterplots and a .csv file saved to 'outDir'
+  source("functions/calculate_fluxes_with_goflux_package.R")
+}
+
+# TODO:
+#    * add date + measCamp to output pdf (line 35) and .csv file (same as above 8.)
+#    * output should go to /home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output/flux_analyses/202305/
+#    * ofc, there are folders 202307 and 202309, too. 
+#    * starts from line 33 onwards. Is currently stored in the wrong place, not sure how to change it
+
diff --git a/preprocessing/run_all_main.R b/preprocessing/run_all_main.R
new file mode 100644
index 0000000..3d3901e
--- /dev/null
+++ b/preprocessing/run_all_main.R
@@ -0,0 +1,148 @@
+#
+# run pre-processing of data and flux calculation for selected measurement campaings
+#
+#
+# status: works with N2O and 2023 measCamp
+#
+
+
+# =============================================================================================== #
+# script settings ----
+# =============================================================================================== #
+
+# list of measurement campaigns to run [YYYYMM]
+measCampList <- c("202305", "202307", "202309")
+
+# gas type to process [T,F]
+process_co2 <- "F"
+process_ch4 <- "F"
+process_n2o <- "T"
+# how to calculate gas fluxes
+calc_fluxes_with_gasfluxes_package  <- "F"
+calc_fluxes_with_goflux_package     <- "T"
+
+# process aeris raw data (depending on measurement campaign) [T,F]
+# FALSE by default
+# might be set to TRUE for individual measurement campaigns (see below)   TODO  this DF is not used anyway!
+process_aeris_raw_data <- "F"
+
+# plot figures [T,F]
+create_plots_ghg_conc  <- "T"
+
+# use the PGA data to simulate GC data, and run only specific filter/correction functions
+simulateGCdata         <- "F"   # [F,T]
+
+
+# =============================================================================================== #
+# init & get main variables & settings, set directories, change into main dir, load R libraries ----
+# =============================================================================================== #
+
+# set dirs
+# main working directory (version control with gitlab)
+# data dir (files with field data)
+# differentiate between users
+username <- Sys.getenv("USER")
+if (username == "ntriches"){
+  # Nathalie
+  mainDir         <- "/home/ntriches/git_repo/data-analysis/preprocessing"
+  dataDir         <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/data"
+  figsGasConcDir  <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/figures_processing"
+  outDir          <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output"
+} else if (username == "jengel"){
+  # Jan
+  mainDir         <- "/home/jengel/repo_git/nathi_data-analysis/preprocessing"
+  dataDir         <- "/home/jengel/nathalie/20231210-field-data"
+  figsGasConcDir  <- "/home/jengel/nathalie/20231210-figs_gas_conc"
+  outDir          <- "/home/jengel/nathalie/20231210-script-output"
+} else {
+  # error catching
+  stop("ERROR no valid username")
+}
+
+# get date & time (e.g.: '20231211_195800')
+currentDateTime <- format(Sys.time(), "%Y%m%d_%k%M%S")
+
+# change into main dir
+setwd(mainDir)
+
+# get additional data needed for flux calculation
+source("additional_info/gas_id.txt")
+source("additional_info/fixed_values.txt")
+
+# load libraries
+library(dplyr)
+library(tidyverse)
+# optional install (if not already installed) and load dplyr package
+#if(!require(dplyr)){install.packages('dplyr')}
+#if(!require(tidyverse)){install.packages('tidyverse')}
+
+if (calc_fluxes_with_gasfluxes_package == "T"){
+  library(gasfluxes)
+}
+if (calc_fluxes_with_goflux_package == "T"){
+  # TODO  move from functions/calculate_fluxes_with_gofluxyourself_package.R to here
+}
+
+# basic R settings
+options("scipen"=100, "digits"=7) # force R not to use exponential notation for number of up to 7 digits
+
+
+# =============================================================================================== #
+# ------------------------------- Data pre-processing -------------------------------------------
+# =============================================================================================== #
+
+source("main_data_preprocessing.R")
+
+#####testing
+measCampList <- c("202307")
+
+# create new empty DF for data and metaData
+ppN2OdataDF           <- data.frame()
+measPeriodMetaDataDF  <- data.frame()
+
+# loop over measCampList
+for (measCamp in measCampList){
+
+  # read raw data from Aeris output file
+  if (measCamp == "202307") process_aeris_raw_data <- "T"
+
+  # extract expYear  ('substr(x, start, stop)')
+  expYear <- substring(measCamp, 1, 4)
+
+  # run function
+  list_hlp <- f_main_data_preprocessing(username,
+                                        process_co2,
+                                        process_ch4,
+                                        process_n2o,
+                                        create_plots_ghg_conc,
+                                        simulateGCdata,
+                                        mainDir,
+                                        dataDir,
+                                        figsGasConcDir,
+                                        outDir,
+                                        currentDateTime,
+                                        measCamp,
+                                        expYear)
+
+  # extract the two DF from list
+  measPeriodMetaDataDF  <- rbind(measPeriodMetaDataDF, as.data.frame(list_hlp[[1]]))
+  ppN2OdataDF           <- rbind(ppN2OdataDF,          as.data.frame(list_hlp[[2]]))
+}
+
+
+
+# =============================================================================================== #
+# ------------------------------- Flux calculation ----------------------------------------------
+# =============================================================================================== #
+
+
+
+
+# =============================================================================================== #
+# ------------------------------- Flux analysis -------------------------------------------------
+# =============================================================================================== #
+
+
+
+
+
diff --git a/preprocessing/run_main_script.sh b/preprocessing/run_main_script.sh
deleted file mode 100755
index 0860f43..0000000
--- a/preprocessing/run_main_script.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/bash
-
-##
-## run the preprocessing main script
-## 'main_script.R'
-##
-
-# experiment years & measurement campaigns
-## 2022
-##  09
-##
-## 2023
-##  05 07 09
-##
-## 2024
-##  tbd
-##
-
-# arguments:
-#   1) expYear  (e.g.: 2023)
-#   2) expMonth (e.g.: 09)
-
-
-# execute the preprocessing for the measurement campaigns the script is working for
-Rscript --vanilla ./main_script.R 2023 05
-Rscript --vanilla ./main_script.R 2023 07
-Rscript --vanilla ./main_script.R 2023 09
-- 
GitLab


From 52a49cb32737d5b81bf288a2e20f99620d889fbb Mon Sep 17 00:00:00 2001
From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de>
Date: Mon, 19 Feb 2024 16:41:59 +0100
Subject: [PATCH 02/24] modify measID to include also measCamp, and run
 pre-processing for all 2023 measCamp

* works for normal & simGC mode
* gives a warning about creating some NA during the process - unsure where this happens
---
 .../create_meas_period_metadata_df.R          | 22 ++++++++++++++++---
 preprocessing/main_data_preprocessing.R       |  5 +++--
 preprocessing/run_all_main.R                  | 11 ++++++----
 3 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/preprocessing/functions/create_meas_period_metadata_df.R b/preprocessing/functions/create_meas_period_metadata_df.R
index 5cce89c..d10bdaa 100644
--- a/preprocessing/functions/create_meas_period_metadata_df.R
+++ b/preprocessing/functions/create_meas_period_metadata_df.R
@@ -4,9 +4,9 @@
 ## new data may be added with further steps of preprocessing the data
 ##
 
-create_measPeriodMetaDataDF <- function(expYear, field_mp_meta_data_DF){
+create_measPeriodMetaDataDF <- function(expYear, measCamp, field_mp_meta_data_DF){
   
-  print("start create meta data DF")
+  print(">> start create meta data DF")
 
   # number of measurement periods
   numRows <- nrow(field_mp_meta_data_DF)
@@ -26,6 +26,22 @@ create_measPeriodMetaDataDF <- function(expYear, field_mp_meta_data_DF){
                              "micro_habitat"     = field_mp_meta_data_DF$micro_habitat,         # field-recorded micro habitat
                              "chamber_volume_m3" = rep(NA, numRows))                            # depends on collar heights, can be different for every measurement period
 
+  ## add the measCamp as integer to the measID
+  if (measCamp == "202305"){
+    measCampInt <- 2023050000
+  } else if(measCamp == "202307"){
+    measCampInt <- 2023070000
+  } else if(measCamp == "202309"){
+    measCampInt <- 2023090000
+  } else {
+    # error, unknown measCamp
+    print("")
+    stop("ERROR - unknown measCamp")
+    print("")
+  }
+  mpMetaDataDF$meas_ID <- measCampInt + mpMetaDataDF$meas_ID
+
+
 
 ## 2022 ####
   
@@ -45,7 +61,7 @@ mpMetaDataDF["collar_height_av_cm"] <- collar_height_average
 # (if ... else) 
   # fill into 
 
-  print("end create meta data DF")
+  print(">>  end create meta data DF")
   return(mpMetaDataDF)
 }
 
diff --git a/preprocessing/main_data_preprocessing.R b/preprocessing/main_data_preprocessing.R
index da54b80..77b51d8 100644
--- a/preprocessing/main_data_preprocessing.R
+++ b/preprocessing/main_data_preprocessing.R
@@ -16,6 +16,7 @@ f_main_data_preprocessing <- function(username,
                                       process_co2,
                                       process_ch4,
                                       process_n2o,
+                                      process_aeris_raw_data,
                                       create_plots_ghg_conc,
                                       simulateGCdata,
                                       mainDir,
@@ -27,7 +28,7 @@ f_main_data_preprocessing <- function(username,
                                       expYear){
 
 # print info
-print(paste0(">>>> start data pre-processing for measurement campaign: ", measCamp, expYear))
+print(paste0(">>>> start data pre-processing for measurement campaign: ", measCamp))
 print("")
 
 # get the gas IDs
@@ -54,7 +55,7 @@ if (expYear == "2022"){
 
 # 2. Create mp meta data DF ----
 source("functions/create_meas_period_metadata_df.R")
-measPeriodMetaDataDF        <- create_measPeriodMetaDataDF(expYear, field_mp_meta_data_DF)
+measPeriodMetaDataDF        <- create_measPeriodMetaDataDF(expYear, measCamp, field_mp_meta_data_DF)
 
 # 3. Sort measurement data per measurement period and gas ----
 
diff --git a/preprocessing/run_all_main.R b/preprocessing/run_all_main.R
index 3d3901e..fbcc48d 100644
--- a/preprocessing/run_all_main.R
+++ b/preprocessing/run_all_main.R
@@ -93,9 +93,6 @@ options("scipen"=100, "digits"=7) # force R not to use exponential notation for
 
 source("main_data_preprocessing.R")
 
-#####testing
-measCampList <- c("202307")
-
 # create new empty DF for data and metaData
 ppN2OdataDF           <- data.frame()
 measPeriodMetaDataDF  <- data.frame()
@@ -104,7 +101,12 @@ measPeriodMetaDataDF  <- data.frame()
 for (measCamp in measCampList){
 
   # read raw data from Aeris output file
-  if (measCamp == "202307") process_aeris_raw_data <- "T"
+  if (measCamp == "202307"){
+    process_aeris_raw_data <- "T"
+  # set back to FALSE for any other measCamp
+  } else{
+    process_aeris_raw_data <- "F"
+  }
 
   # extract expYear  ('substr(x, start, stop)')
   expYear <- substring(measCamp, 1, 4)
@@ -114,6 +116,7 @@ for (measCamp in measCampList){
                                         process_co2,
                                         process_ch4,
                                         process_n2o,
+                                        process_aeris_raw_data,
                                         create_plots_ghg_conc,
                                         simulateGCdata,
                                         mainDir,
-- 
GitLab


From 87f9bc2ae12eb7ce7593879534e7eae4dcbb45c7 Mon Sep 17 00:00:00 2001
From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de>
Date: Tue, 20 Feb 2024 13:04:32 +0100
Subject: [PATCH 03/24] modify flux calculation & analysis

* should now run automatically with the DF provided by pre-processing
* remove the file 'combine_all_meas_campaigns.R' and moved its code
  into 'combine_all_data_and_meta_data.R'
* not yet tested
---
 .../calculate_fluxes_with_goflux_package.R    |  38 ++----
 .../combine_all_data_and_meta_data.R          | 118 +++++++++++-------
 .../functions/combine_all_meas_campaigns.R    |  78 ------------
 .../create_input_for_goflux_package.R         |  12 +-
 .../functions/visualise_fluxes_from_goflux.R  |  63 +++++-----
 preprocessing/main_data_preprocessing.R       |   1 +
 preprocessing/main_flux_analysis.R            |  15 ++-
 preprocessing/main_flux_calculation.R         |  21 +++-
 preprocessing/run_all_main.R                  |  26 +++-
 9 files changed, 177 insertions(+), 195 deletions(-)
 delete mode 100644 preprocessing/functions/combine_all_meas_campaigns.R

diff --git a/preprocessing/functions/calculate_fluxes_with_goflux_package.R b/preprocessing/functions/calculate_fluxes_with_goflux_package.R
index d49ba36..d139bfb 100644
--- a/preprocessing/functions/calculate_fluxes_with_goflux_package.R
+++ b/preprocessing/functions/calculate_fluxes_with_goflux_package.R
@@ -1,54 +1,40 @@
 # calc fluxes with GoFlux package
-
-
-# install package remotes
-if (!require("remotes", quietly = TRUE))
-  install.packages("remotes")
-
-# install package GoFlux every time to ensure using the most recent version
-remotes::install_github("Qepanna/goFlux")
-
-# load library
-library(goFlux)
-library(tidyverse)
-
+# 
+#
+# input DF: goflux_input_n2o_df
+#
+# output DF: goflux_best_n2o_flux_df
+#
 
 ## use DF provided by "functions/create_input_for_goflux_package.R"
 ## either read input file or use DF directly
 
-# read input file
-#goflux_input_n2o_df <- read.csv("/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output/measCamp_202305goflux_input_n2o_df_20240131.csv",
-#                sep = ";", na = "NA")
-
 # remove all NAs 
 goflux_input_n2o_df <- goflux_input_n2o_df %>%
   drop_na() # remove all rows with a missing value in any column 
+
 # calculate fluxes
 n2o_flux_result_df <- goFlux(goflux_input_n2o_df, "N2Odry_ppb", prec = 0.2)
 
 # choose best flux
-best_n2o_flux_df <-   best.flux(flux.result = n2o_flux_result_df, g.limit = 1.25)
+goflux_best_n2o_flux_df   <- best.flux(flux.result = n2o_flux_result_df, g.limit = 1.25)
 ?best.flux
 
-
 # plot results of best fluxes
-plot_list <- flux.plot(flux.results = best_n2o_flux_df, dataframe = goflux_input_n2o_df, gastype = "N2Odry_ppb",
+plot_list <- flux.plot(flux.results = goflux_best_n2o_flux_df, dataframe = goflux_input_n2o_df, gastype = "N2Odry_ppb",
                        shoulder = 20,
                        plot.legend = c("MAE", "RMSE", "AICc", "SE", "r2", "g.factor"),
                        best.model = TRUE)
 
-
-# save output file as pdf - figures
+# save output file as pdf - figures (this function does save files to the 'present working dir' - hence, changing into 'outDir')
 setwd(outDir)
 flux2pdf(plot.list = plot_list, outfile = NULL, width = 11.6, height = 8.2)
+setwd(mainDir)
 
 # save output files - flux calculations
 # write.table(n2o_flux_result,
 #             paste0(outDir, "/measCamp_", measCamp, "goflux_goflux_output.csv"),
 #             row.names = FALSE, quote = FALSE, sep = ";", col.names = TRUE, na = "NA")
-write.table(best_n2o_flux_df,
+write.table(goflux_best_n2o_flux_df,
             paste0(outDir, "/measCamp_", measCamp, "goflux_bestflux_output.csv"),
             row.names = FALSE, quote = FALSE, sep = ";", col.names = TRUE, na = "NA")
-setwd(mainDir)
-
-
diff --git a/preprocessing/functions/combine_all_data_and_meta_data.R b/preprocessing/functions/combine_all_data_and_meta_data.R
index 32668df..342bf5e 100644
--- a/preprocessing/functions/combine_all_data_and_meta_data.R
+++ b/preprocessing/functions/combine_all_data_and_meta_data.R
@@ -1,17 +1,10 @@
-# aim: combine all data and meta data 
-
-## input files with flux calculation results provided by "functions/calculate_fluxes_with_goflux_package.R"
-# spring 2023
-goflux_results_file <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/flux_analyses/202305/measCamp_202305goflux_bestflux_output.csv"
-# summer 2023
-goflux_results_file <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/flux_analyses/202307/measCamp_202307goflux_bestflux_output.csv"
-# autumn 2023 
-goflux_results_file <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/flux_analyses/202309/measCamp_202309goflux_bestflux_output.csv"
-
-## use either input file:
-goflux_results_df <- read.table(goflux_results_file, sep = ";", header = TRUE, stringsAsFactors = FALSE)
-## or use DF (from "functions/calculate_fluxes_with_goflux_package.R")
-goflux_results_df <- best_n2o_flux_df
+# combine all data and meta data 
+# 
+#
+# input DF: goflux_best_n2o_flux_df
+#
+# output DF: goflux_data_metadata_df
+#
 
 
 # create df with all needed variables: 
@@ -21,9 +14,9 @@ goflux_results_df <- best_n2o_flux_df
 # flux estimates for N2O, CH4, CO2 
 # add start_time to plot the whole year
 
-measurement_periods_total   <- nrow(goflux_results_df) 
+measurement_periods_total   <- nrow(goflux_best_n2o_flux_df) 
 print(paste0("number of mp: ", measurement_periods_total))
-str(goflux_results_df)
+str(goflux_best_n2o_flux_df)
 # run this for all measurement campaigns 
 goflux_data_metadata_df <- data.frame(
   datetime                 = as.POSIXct(rep("1000-01-01 00:00:00", measurement_periods_total), format = "%Y-%m-%d %H:%M:%S", tz="UTC" ),
@@ -48,16 +41,16 @@ goflux_data_metadata_df <- data.frame(
 )
 
 # fill in values from measPeriodMetaDataDF (same amount of rows) and 
-# goflux_results_df with best flux = N2O nmol
+# goflux_best_n2o_flux_df with best flux = N2O nmol
 # remove invalid mps
 measPeriodMetaDataDF <- measPeriodMetaDataDF %>%
   filter(is_mp_valid == 1)
 
 # loop over all mp provided by goflux package output individually
-for (gf_meas_ID in goflux_results_df$UniqueID) {
+for (gf_meas_ID in goflux_best_n2o_flux_df$UniqueID) {
   #print(gf_meas_ID)
   # get the rows corresponding with the gf_meas_ID
-  line_Num_gf_results_df <- which(goflux_results_df$UniqueID  == gf_meas_ID)
+  line_Num_gf_results_df <- which(goflux_best_n2o_flux_df$UniqueID  == gf_meas_ID)
   line_Num_metadata_df   <- which(measPeriodMetaDataDF$meas_ID == gf_meas_ID)
   
   # choose only n2o mps measured with Aeris gas analyser
@@ -69,16 +62,16 @@ for (gf_meas_ID in goflux_results_df$UniqueID) {
   goflux_data_metadata_df$plot_no[line_Num_gf_results_df]          <- measPeriodMetaDataDF$plot_no[line_Num_metadata_df]
   goflux_data_metadata_df$micro_habitat[line_Num_gf_results_df]    <- measPeriodMetaDataDF$micro_habitat[line_Num_metadata_df]
   goflux_data_metadata_df$light_dark[line_Num_gf_results_df]       <- measPeriodMetaDataDF$light_dark[line_Num_metadata_df]
-  goflux_data_metadata_df$n2o_nmolm2sec1[line_Num_gf_results_df]   <- goflux_results_df$best.flux[line_Num_metadata_df]
-  goflux_data_metadata_df$n2o_LMFlux[line_Num_gf_results_df]       <- goflux_results_df$LM.flux[line_Num_metadata_df]
-  goflux_data_metadata_df$n2o_HMFlux[line_Num_gf_results_df]       <- goflux_results_df$HM.flux[line_Num_metadata_df]
-  goflux_data_metadata_df$n2o_bestFlux[line_Num_gf_results_df]     <- goflux_results_df$best.flux[line_Num_metadata_df]
-  goflux_data_metadata_df$n2o_bestModel[line_Num_gf_results_df]    <- goflux_results_df$model[line_Num_metadata_df]
+  goflux_data_metadata_df$n2o_nmolm2sec1[line_Num_gf_results_df]   <- goflux_best_n2o_flux_df$best.flux[line_Num_metadata_df]
+  goflux_data_metadata_df$n2o_LMFlux[line_Num_gf_results_df]       <- goflux_best_n2o_flux_df$LM.flux[line_Num_metadata_df]
+  goflux_data_metadata_df$n2o_HMFlux[line_Num_gf_results_df]       <- goflux_best_n2o_flux_df$HM.flux[line_Num_metadata_df]
+  goflux_data_metadata_df$n2o_bestFlux[line_Num_gf_results_df]     <- goflux_best_n2o_flux_df$best.flux[line_Num_metadata_df]
+  goflux_data_metadata_df$n2o_bestModel[line_Num_gf_results_df]    <- goflux_best_n2o_flux_df$model[line_Num_metadata_df]
 }
 
 # fill in values from ppN2OdataDF (differing amount of rows)
 # loop over all mp provided by gas_fluxes pkg output individually
-for (gf_meas_ID in goflux_results_df$UniqueID) {
+for (gf_meas_ID in goflux_best_n2o_flux_df$UniqueID) {
   line_Num_metadata_df   <- which(measPeriodMetaDataDF$meas_ID == gf_meas_ID)
   
   # choose only n2o mps measured with Aeris gas analyser
@@ -127,20 +120,61 @@ for (gf_meas_ID in goflux_results_df$UniqueID) {
 # }
 
 # write DF goflux_data_metadata_df
-# 202305 May campaign
-write.table(goflux_data_metadata_df, "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/flux_analyses/all/20240131_goflux_n2o_data_metadata_202305.csv",
-            row.names = FALSE, quote = FALSE, sep = ",")
-
-# 202307 July 2023 campaign 
-write.table(goflux_data_metadata_df, "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/flux_analyses/all/20240131_goflux_n2o_data_metadata_202307.csv",
-            row.names = FALSE, quote = FALSE, sep = ",")
-# 202309 Sept 2023 campaign 
-write.table(goflux_data_metadata_df, "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/flux_analyses/all/20240131_goflux_n2o_data_metadata_202309.csv",
-            row.names = FALSE, quote = FALSE, sep = ",")
-
-
-
-
-
-
-
+write.table(goflux_data_metadata_df,
+            paste0(fluxResOutDir, "/all/", currentDateTime, "_goflux_n2o_data_metadata.csv"),
+            row.names = FALSE,
+            quote = FALSE,
+            sep = ",")
+
+# Corrections of the DF ####
+
+# change datetime to POSIXct
+goflux_data_metadata_df$datetime       <- as.POSIXct(goflux_data_metadata_df$datetime, format = "%Y-%m-%d %H:%M:%S", tz="UTC")
+# remove white space from micro habitats
+goflux_data_metadata_df$micro_habitat  <- gsub(" ", "", goflux_data_metadata_df$micro_habitat)
+# remove white space from light_dark
+goflux_data_metadata_df$light_dark     <- gsub(" ", "", goflux_data_metadata_df$light_dark)
+# change dak to dark 
+goflux_data_metadata_df$light_dark[which(goflux_data_metadata_df$light_dark=="dak")] <- "dark"
+# change palsa-moss to palsa_moss
+goflux_data_metadata_df$micro_habitat[which(goflux_data_metadata_df$micro_habitat=="palsa-moss")] <- "palsa_moss"
+
+# remove first mp / row 
+# goflux_data_metadata_df <- goflux_data_metadata_df[-1, ]
+
+# remove values below -1000
+goflux_data_metadata_df <- goflux_data_metadata_df %>%
+  filter(n2o_nmolm2sec1 > -500)
+
+NAs <- complete.cases(goflux_data_metadata_df)
+goflux_data_metadata_df <- goflux_data_metadata_df %>%
+  filter(!NAs)
+
+
+# create new DF - one per measCampaign
+# needs 'stringr' library
+#   selection based on 'datetime' column containing "year-month"
+#   and replace 'measCamp' column values with a string of the name of the month
+
+measCampSpring <- goflux_data_metadata_df %>%
+  filter(str_detect(datetime, "2023-05")) %>%
+  mutate(measCamp="May") 
+
+measCampSummer <- goflux_data_metadata_df %>%
+  filter(str_detect(datetime, "2023-07")) %>%
+  mutate(measCamp="July") 
+
+measCampAutumn <- goflux_data_metadata_df %>%
+  filter(str_detect(datetime, "2023-09")) %>%
+  mutate(measCamp="September") 
+
+# combine data frames from all measurement campaigns to one data set
+goflux_data_metadata_df <- measCampSpring
+goflux_data_metadata_df <- rbind(goflux_data_metadata_df, measCampSummer, measCampAutumn)
+
+# TODO  write out the DF as file ?
+
+print("")
+print(">>>  finished  <<<")
+print(" the DF goflux_data_metadata_df is ow available in your global environment")
+print("")
\ No newline at end of file
diff --git a/preprocessing/functions/combine_all_meas_campaigns.R b/preprocessing/functions/combine_all_meas_campaigns.R
deleted file mode 100644
index ada822d..0000000
--- a/preprocessing/functions/combine_all_meas_campaigns.R
+++ /dev/null
@@ -1,78 +0,0 @@
-# Read in files ####
-# output files from create_dataframe_for_analysis_from_goflux function
-
-# load libraries
-library(dplyr)
-library(stringr)
-
-
-## input files provided by "functions/combine_all_data_and_meta_data.R"
-# spring 2023
-goflux_metadata_file_2305 <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/flux_analyses/all/20240131_goflux_n2o_data_metadata_202305.csv"
-# summer 2023
-goflux_metadata_file_2307 <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/flux_analyses/all/20240131_goflux_n2o_data_metadata_202307.csv"
-# autumn 2023 
-goflux_metadata_file_2309 <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/flux_analyses/all/20240131_goflux_n2o_data_metadata_202309.csv"
-
-## read input files - one per measCamp
-goflux_metadata_df_2305 <- read.table(goflux_metadata_file_2305, header = TRUE, sep = ",", stringsAsFactors = FALSE)
-# are there NAs in the n2o fluxes within the data frame?
-#goflux_metadata_df_2305[is.na(goflux_metadata_df_2305$n2o_nmolm2sec1), ] # yes
-# drop NA rows
-#goflux_metadata_df_2305_noNA <- goflux_metadata_df_2305[-c(628:667), ]
-#goflux_metadata_df_2305_noNA[is.na(goflux_metadata_df_2305_noNA$n2o_nmolm2sec1), ] # no
-goflux_metadata_df_2307 <- read.table(goflux_metadata_file_2307, header = TRUE, sep = ",", stringsAsFactors = FALSE)
-goflux_metadata_df_2309 <- read.table(goflux_metadata_file_2309, header = TRUE, sep = ",", stringsAsFactors = FALSE)
-
-# combine data frames from all measurement campaigns to one data set
-flux_data_meta_data_all_measCamp <- goflux_metadata_df_2305
-flux_data_meta_data_all_measCamp <- rbind(flux_data_meta_data_all_measCamp, goflux_metadata_df_2307, goflux_metadata_df_2309)
-str(flux_data_meta_data_all_measCamp)
-
-# Corrections of the DF ####
-
-# change datetime to POSIXct
-flux_data_meta_data_all_measCamp$datetime       <- as.POSIXct(flux_data_meta_data_all_measCamp$datetime, format = "%Y-%m-%d %H:%M:%S", tz="UTC")
-# remove white space from micro habitats
-flux_data_meta_data_all_measCamp$micro_habitat  <- gsub(" ", "", flux_data_meta_data_all_measCamp$micro_habitat)
-# remove white space from light_dark
-flux_data_meta_data_all_measCamp$light_dark     <- gsub(" ", "", flux_data_meta_data_all_measCamp$light_dark)
-# change dak to dark 
-flux_data_meta_data_all_measCamp$light_dark[which(flux_data_meta_data_all_measCamp$light_dark=="dak")] <- "dark"
-# change palsa-moss to palsa_moss
-flux_data_meta_data_all_measCamp$micro_habitat[which(flux_data_meta_data_all_measCamp$micro_habitat=="palsa-moss")] <- "palsa_moss"
-
-# remove first mp / row 
-# flux_data_meta_data_all_measCamp <- flux_data_meta_data_all_measCamp[-1, ]
-
-# remove values below -1000
-#library(dplyr)
-flux_data_meta_data_all_measCamp <- flux_data_meta_data_all_measCamp %>%
-  filter(n2o_nmolm2sec1 > -500)
-
-NAs <- complete.cases(flux_data_meta_data_all_measCamp)
-flux_data_meta_data_all_measCamp <- flux_data_meta_data_all_measCamp %>%
-  filter(!NAs)
-
-
-# create new DF - one per measCampaign
-# selection based on 'datetime' column containing "year-month"
-# and replace 'measCamp' column values with a string of the name of the month
-
-#library(stringr)
-measCampSpring <- flux_data_meta_data_all_measCamp %>%
-  filter(str_detect(datetime, "2023-05")) %>%
-  mutate(measCamp="May") 
-
-measCampSummer <- flux_data_meta_data_all_measCamp %>%
-  filter(str_detect(datetime, "2023-07")) %>%
-  mutate(measCamp="July") 
-
-measCampAutumn <- flux_data_meta_data_all_measCamp %>%
-  filter(str_detect(datetime, "2023-09")) %>%
-  mutate(measCamp="September") 
-
-# combine data frames from all measurement campaigns to one data set
-flux_data_meta_data_all_measCamp <- measCampSpring
-flux_data_meta_data_all_measCamp <- rbind(flux_data_meta_data_all_measCamp, measCampSummer, measCampAutumn)
-
diff --git a/preprocessing/functions/create_input_for_goflux_package.R b/preprocessing/functions/create_input_for_goflux_package.R
index c5bf4a3..85e9746 100644
--- a/preprocessing/functions/create_input_for_goflux_package.R
+++ b/preprocessing/functions/create_input_for_goflux_package.R
@@ -1,10 +1,15 @@
 # Preparation of input files for gasfluxes package (same structure as HMR package)
 # main script until "sort data" needs to be run before this script runs 
-# (not embedded in main script yet)
+# 
 #
-# output DF: gasfluxes_input_n2o_df
+# input DF: ppN2OdataDF & measPeriodMetaDataDF
 #
+# output DF: goflux_input_n2o_df
+#
+
 
+# source file with fixed values needed for flux calculation
+source("additional_info/fixed_values.txt")
 
 # N2O
 # first convert ppm to ppb and add as column to df
@@ -26,9 +31,6 @@ empty_cols <- data.frame("plot_no"              = rep(NA, numRows),
 # add 'empty_cols' DF to mPN2oPlotHabitat DF
 mPN2oPlotHabitat <- cbind(mPN2oPlotHabitat, empty_cols)
 
-# source file with fixed values needed for flux calculation
-source("additional_info/fixed_values.txt")
-
 # loop over all mps
 for (iLine in 1:nrow(measPeriodMetaDataDF)){
   # choose only n2o mps measured with Aeris gas analyser
diff --git a/preprocessing/functions/visualise_fluxes_from_goflux.R b/preprocessing/functions/visualise_fluxes_from_goflux.R
index 2d7293a..b319355 100644
--- a/preprocessing/functions/visualise_fluxes_from_goflux.R
+++ b/preprocessing/functions/visualise_fluxes_from_goflux.R
@@ -1,46 +1,51 @@
+#
+# manual plotting of flux results from goFlux
+#
+# input DF goflux_data_metadata_df provided by 'combine_all_data_and_meta_data.R'
+#
 
 
 # R base plots ---------------------------------------------------------------
 # select rows with onlylight / dark
-rows_light <- which(flux_data_meta_data_all_measCamp$light_dark == "light")
-rows_dark  <- which(flux_data_meta_data_all_measCamp$light_dark == "dark")
+rows_light <- which(goflux_data_metadata_df$light_dark == "light")
+rows_dark  <- which(goflux_data_metadata_df$light_dark == "dark")
 
 ### manually select
-row_select <- 1:nrow(flux_data_meta_data_all_measCamp)  # ALL rows. i.e, undo other selection
+row_select <- 1:nrow(goflux_data_metadata_df)  # ALL rows. i.e, undo other selection
 row_select <- rows_light
 row_select <- rows_dark
 
-plot(flux_data_meta_data_all_measCamp$n2o_nmolm2sec1[row_select] ~ flux_data_meta_data_all_measCamp$datetime[row_select]
+plot(goflux_data_metadata_df$n2o_nmolm2sec1[row_select] ~ goflux_data_metadata_df$datetime[row_select]
      )
      #,ylim = c(-0.001,0.0030), col = "green3")
 abline(h=0, col = "green2")
-fit <- lm(flux_data_meta_data_all_measCamp$n2o_nmolm2sec1[row_select] ~ flux_data_meta_data_all_measCamp$datetime[row_select])
+fit <- lm(goflux_data_metadata_df$n2o_nmolm2sec1[row_select] ~ goflux_data_metadata_df$datetime[row_select])
 abline(fit, col="blue2")
 
 # ligth and dark
 row_select <- rows_light
-plot(flux_data_meta_data_all_measCamp$n2o_HMFlux[row_select] ~ 
-       flux_data_meta_data_all_measCamp$datetime[row_select],
+plot(goflux_data_metadata_df$n2o_HMFlux[row_select] ~ 
+       goflux_data_metadata_df$datetime[row_select],
      #)
      ylim = c(-50, 50), col = "orange", cex = 1,
      xlab = "months in 2023",
      ylab = 'N'['2']*'O' * ' flux ('* 'mg' ~N[2]*O-N~ m^-2~h^-1*')')
 row_select <- rows_dark
-points(flux_data_meta_data_all_measCamp$n2o_nmolm2sec1[row_select] ~ flux_data_meta_data_all_measCamp$datetime[row_select],
+points(goflux_data_metadata_df$n2o_nmolm2sec1[row_select] ~ goflux_data_metadata_df$datetime[row_select],
        col = "black", pch = 18, cex = 1,
 )
 abline(h=0, col = "black")
 
 ## soil moist
-plot(flux_data_meta_data_all_measCamp$n2o_nmolm2sec1[row_select] ~ flux_data_meta_data_all_measCamp$soil_moisture_12cm[row_select],
+plot(goflux_data_metadata_df$n2o_nmolm2sec1[row_select] ~ goflux_data_metadata_df$soil_moisture_12cm[row_select],
      ylim = c(-0.001, 0.0015))
-fit <- lm(flux_data_meta_data_all_measCamp$n2o_nmolm2sec1[row_select] ~ flux_data_meta_data_all_measCamp$soil_moisture_12cm[row_select])
+fit <- lm(goflux_data_metadata_df$n2o_nmolm2sec1[row_select] ~ goflux_data_metadata_df$soil_moisture_12cm[row_select])
 abline(fit, col="blue2")
 
 ## soil temp
-plot(flux_data_meta_data_all_measCamp$n2o_nmolm2sec1[row_select] ~ flux_data_meta_data_all_measCamp$soil_temperature_15cm_s1[row_select],
+plot(goflux_data_metadata_df$n2o_nmolm2sec1[row_select] ~ goflux_data_metadata_df$soil_temperature_15cm_s1[row_select],
      ylim = c(-0.001, 0.0015))
-fit <- lm(flux_data_meta_data_all_measCamp$n2o_nmolm2sec1[row_select] ~ flux_data_meta_data_all_measCamp$soil_temperature_15cm_s1[row_select])
+fit <- lm(goflux_data_metadata_df$n2o_nmolm2sec1[row_select] ~ goflux_data_metadata_df$soil_temperature_15cm_s1[row_select])
 abline(fit, col="blue2")
 abline(h=0, col = "green2")
 
@@ -97,7 +102,7 @@ symlog_trans <- function(base = 10, thr = 1, scale = 1){
 
 
 # N2O fluxes over seasons ####
-n2o_fluxes_seasons <- flux_data_meta_data_all_measCamp %>%
+n2o_fluxes_seasons <- goflux_data_metadata_df %>%
   filter(plot_no != 8) %>%
   mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>%
   ggplot(aes(x = datetime, y = n2o_nmolm2sec1, )) +
@@ -115,7 +120,7 @@ n2o_fluxes_seasons <- flux_data_meta_data_all_measCamp %>%
 n2o_fluxes_seasons
 
 # L + D -8 ####
-n2o_fluxes_seasons2 <- flux_data_meta_data_all_measCamp %>%
+n2o_fluxes_seasons2 <- goflux_data_metadata_df %>%
   filter(plot_no != 8) %>%
   mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>%
   ggplot(aes(x = datetime, y = n2o_nmolm2sec1, colour = light_dark, shape = light_dark)) +
@@ -134,7 +139,7 @@ n2o_fluxes_seasons2
   
 
 # palsa_lichen ####
-n2o_season_palsalichen <- flux_data_meta_data_all_measCamp %>%
+n2o_season_palsalichen <- goflux_data_metadata_df %>%
   filter(micro_habitat == "palsa_lichen") %>%
   mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>%
   ggplot(aes(x = datetime, y = n2o_nmolm2sec1, colour = light_dark, shape = light_dark)) +
@@ -153,7 +158,7 @@ n2o_season_palsalichen <- flux_data_meta_data_all_measCamp %>%
 n2o_season_palsalichen
 
 # palsa moss 8####
-n2o_season_pm8 <- flux_data_meta_data_all_measCamp %>%
+n2o_season_pm8 <- goflux_data_metadata_df %>%
   filter(micro_habitat == "palsa_moss") %>%
   filter(plot_no == 8) %>%
   mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>%
@@ -173,7 +178,7 @@ n2o_season_pm8 <- flux_data_meta_data_all_measCamp %>%
 n2o_season_pm8
 
 # palsa_moss -8 ####
-n2o_season_palsamoss <- flux_data_meta_data_all_measCamp %>%
+n2o_season_palsamoss <- goflux_data_metadata_df %>%
   filter(micro_habitat == "palsa_moss") %>%
   filter(plot_no != 8) %>%
   mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>%
@@ -193,7 +198,7 @@ n2o_season_palsamoss <- flux_data_meta_data_all_measCamp %>%
 n2o_season_palsamoss
 
 # N2O fluxes over season: bog plots 
-n2o_season_bog <- flux_data_meta_data_all_measCamp %>%
+n2o_season_bog <- goflux_data_metadata_df %>%
   filter(micro_habitat == "bog") %>%
   mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>%
   ggplot(aes(x = datetime, y = n2o_nmolm2sec1, colour = light_dark, shape = light_dark)) +
@@ -212,7 +217,7 @@ n2o_season_bog <- flux_data_meta_data_all_measCamp %>%
 n2o_season_bog
 
 # N2O fluxes over season: fen plots 
-n2o_season_fen <- flux_data_meta_data_all_measCamp %>%
+n2o_season_fen <- goflux_data_metadata_df %>%
   filter(micro_habitat == "fen") %>%
   mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>%
   ggplot(aes(x = datetime, y = n2o_nmolm2sec1, colour = light_dark, shape = light_dark)) +
@@ -232,7 +237,7 @@ n2o_season_fen
 
 # Boxplots ####
 # L + D ####
-dark_light_season <- flux_data_meta_data_all_measCamp %>%
+dark_light_season <- goflux_data_metadata_df %>%
   mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>%
   ggplot(aes(x = micro_habitat, y = n2o_nmolm2sec1, colour = light_dark, shape = light_dark)) +
   scale_colour_manual(values = c("black", "orange")) +
@@ -244,7 +249,7 @@ dark_light_season <- flux_data_meta_data_all_measCamp %>%
 dark_light_season
 
 # L + D -8 ####
-dark_light_season <- flux_data_meta_data_all_measCamp %>%
+dark_light_season <- goflux_data_metadata_df %>%
   mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>%
   mutate(micro_habitat = factor(micro_habitat, levels = c("palsa_lichen", "palsa_moss", "bog", "fen"))) %>%
   filter(plot_no != 8) %>%
@@ -258,7 +263,7 @@ ggplot(aes(x = micro_habitat, y = n2o_nmolm2sec1, colour = light_dark, shape = l
 dark_light_season
 
 # Spatial variability overall ####
-variability_transects <- flux_data_meta_data_all_measCamp %>%
+variability_transects <- goflux_data_metadata_df %>%
   filter(plot_no != 8) %>%
   #filter(light_dark == "dark") %>%
   ggplot(aes(plot_no, n2o_nmolm2sec1, group = plot_no)) + 
@@ -273,7 +278,7 @@ variability_transects <- flux_data_meta_data_all_measCamp %>%
 variability_transects
 
 # Spatial variability per measCamp####
-variability_measCamp <- flux_data_meta_data_all_measCamp %>%
+variability_measCamp <- goflux_data_metadata_df %>%
   filter(plot_no != 8) %>%
   ggplot(aes(micro_habitat, n2o_nmolm2sec1)) + 
   geom_boxplot() +
@@ -289,7 +294,7 @@ variability_measCamp <- flux_data_meta_data_all_measCamp %>%
 variability_measCamp
 
 # L per micro habitat ####
-light <- flux_data_meta_data_all_measCamp %>%
+light <- goflux_data_metadata_df %>%
   filter(plot_no != 8) %>%
   filter(light_dark == "light") %>%
   ggplot(aes(micro_habitat, n2o_nmolm2sec1)) + 
@@ -304,7 +309,7 @@ light <- flux_data_meta_data_all_measCamp %>%
 light
 
 # D per micro habitat ####
-dark <- flux_data_meta_data_all_measCamp %>%
+dark <- goflux_data_metadata_df %>%
   filter(plot_no != 8) %>%
   filter(light_dark == "dark") %>%
   ggplot(aes(micro_habitat, n2o_nmolm2sec1)) + 
@@ -320,7 +325,7 @@ dark
 
 # Soil temperature #### 
 # Soil T4 overall ####
-n2o_fluxes_soiltemp4 <- flux_data_meta_data_all_measCamp %>%
+n2o_fluxes_soiltemp4 <- goflux_data_metadata_df %>%
   filter(plot_no != 8) %>%
   ggplot( aes(x = soil_temperature_15cm_s4, y = n2o_nmolm2sec1, colour = micro_habitat)) +
   geom_point(size = 2) +
@@ -335,7 +340,7 @@ n2o_fluxes_soiltemp4 <- flux_data_meta_data_all_measCamp %>%
 n2o_fluxes_soiltemp4
 
 # Soil T4 per micro habitat####
-n2o_fluxes_soiltemp4 <- flux_data_meta_data_all_measCamp %>%
+n2o_fluxes_soiltemp4 <- goflux_data_metadata_df %>%
   filter(plot_no != 8) %>%
   ggplot( aes(x = soil_temperature_15cm_s1, y = n2o_nmolm2sec1, colour = micro_habitat, shape = micro_habitat)) +
   geom_point(size = 2) +
@@ -353,7 +358,7 @@ n2o_fluxes_soiltemp4
 
 # Soil moisture ####
 # Soil M 12cm ####
-n2o_fluxes_soilmoist_12cm <- flux_data_meta_data_all_measCamp %>%
+n2o_fluxes_soilmoist_12cm <- goflux_data_metadata_df %>%
   filter(plot_no != 8) %>%
   ggplot( aes(x = soil_moisture_12cm, y = n2o_nmolm2sec1, colour = micro_habitat)) +
   geom_point(size = 2) +
@@ -369,7 +374,7 @@ n2o_fluxes_soilmoist_12cm <- flux_data_meta_data_all_measCamp %>%
 n2o_fluxes_soilmoist_12cm
 
 # Soil M 30 cm ####
-n2o_fluxes_soilmoist_30cm <- flux_data_meta_data_all_measCamp %>%
+n2o_fluxes_soilmoist_30cm <- goflux_data_metadata_df %>%
   filter(plot_no != 8) %>%
   filter(soil_moisture_30cm < 2000) %>% # remove some weird error values 
   filter(soil_moisture_30cm > 0) %>% # remove 0s
diff --git a/preprocessing/main_data_preprocessing.R b/preprocessing/main_data_preprocessing.R
index 77b51d8..0ec132a 100644
--- a/preprocessing/main_data_preprocessing.R
+++ b/preprocessing/main_data_preprocessing.R
@@ -28,6 +28,7 @@ f_main_data_preprocessing <- function(username,
                                       expYear){
 
 # print info
+print("")
 print(paste0(">>>> start data pre-processing for measurement campaign: ", measCamp))
 print("")
 
diff --git a/preprocessing/main_flux_analysis.R b/preprocessing/main_flux_analysis.R
index 04da2f0..52a41e7 100644
--- a/preprocessing/main_flux_analysis.R
+++ b/preprocessing/main_flux_analysis.R
@@ -8,18 +8,21 @@
 # ------------------------------- Flux analysis -------------------------------------------------
 # =============================================================================================== #
 
-# 1. Combine all data and meta data----
+# print info
+print("")
+print(">>>> start flux analysis")
+print("")
+
+# 1. Combine all data and meta data ----
 # output: one dataframe that can be used for data analysis and visualisation
-# source("/functions/combine_all_data_and_meta_data.R")
+source("/functions/combine_all_data_and_meta_data.R")
 # to do: probably adjust code so it can run automatically
 # change output: add measCamp and date (line 124-134)
 # output should be stored here: /home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output/flux_analyses/all/
 
-# 2. Combine all measCamps ---- 
-
-# 3. Visualise data ----
+# 2. Visualise data ----
 # does not need to run automatically
-#source("/functions/combine_all_meas_campaigns.R")
+# .... preprocessing/functions/visualise_fluxes_from_goflux.R
 
 
 # TODO ?
diff --git a/preprocessing/main_flux_calculation.R b/preprocessing/main_flux_calculation.R
index 6562668..9b443ad 100644
--- a/preprocessing/main_flux_calculation.R
+++ b/preprocessing/main_flux_calculation.R
@@ -8,19 +8,30 @@
 # ------------------------------- Flux calculation ----------------------------------------------
 # =============================================================================================== #
 
+# print info
+print("")
+print(">>>> start flux calculation")
+print("")
+
 # 1. Create input for goFlux package ----
 source("functions/create_input_for_goflux_package.R")
-# output: goflux_input_n2o_df + csv file 
-# (to do ONLY IF IT DOESN'T RUN AUTOMATICALLY : change output name of csv file: add current date (not time))
 
 # 2. Calculate fluxes with goFlux package ----
 if (calc_fluxes_with_goflux_package == "T"){
   print("")
   print(">> prepare DF for and calculate fluxes with the goflux package")
-  
+
+  if (updateGoFluxPkg == "T"){
+    # install / update goFlux package (to ensure using the most recent version)
+    # install package remotes
+    if (!require("remotes", quietly = TRUE)) install.packages("remotes")
+    remotes::install_github("Qepanna/goFlux")
+  }
+
+  # load library
+  library(goFlux)
+
   # calc fluxes
-  # simply source the file and run code (not a function yet)
-  # output best_n2o_flux_df + pdf scatterplots and a .csv file saved to 'outDir'
   source("functions/calculate_fluxes_with_goflux_package.R")
 }
 
diff --git a/preprocessing/run_all_main.R b/preprocessing/run_all_main.R
index fbcc48d..90a7400 100644
--- a/preprocessing/run_all_main.R
+++ b/preprocessing/run_all_main.R
@@ -10,6 +10,8 @@
 # script settings ----
 # =============================================================================================== #
 
+# 1. Script settings ----
+
 # list of measurement campaigns to run [YYYYMM]
 measCampList <- c("202305", "202307", "202309")
 
@@ -32,11 +34,16 @@ create_plots_ghg_conc  <- "T"
 # use the PGA data to simulate GC data, and run only specific filter/correction functions
 simulateGCdata         <- "F"   # [F,T]
 
+# install / update goFlux package (to ensure using the most recent version) in 'main_flux_calculation.R'
+updateGoFluxPkg        <- "F"   # [F,T]
+
 
 # =============================================================================================== #
 # init & get main variables & settings, set directories, change into main dir, load R libraries ----
 # =============================================================================================== #
 
+# 2. Init main variables ----
+
 # set dirs
 # main working directory (version control with gitlab)
 # data dir (files with field data)
@@ -48,12 +55,14 @@ if (username == "ntriches"){
   dataDir         <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/data"
   figsGasConcDir  <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/figures_processing"
   outDir          <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output"
+  fluxResOutDir   <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/flux_analyses"
 } else if (username == "jengel"){
   # Jan
   mainDir         <- "/home/jengel/repo_git/nathi_data-analysis/preprocessing"
   dataDir         <- "/home/jengel/nathalie/20231210-field-data"
   figsGasConcDir  <- "/home/jengel/nathalie/20231210-figs_gas_conc"
   outDir          <- "/home/jengel/nathalie/20231210-script-output"
+  fluxResOutDir   <- paste0(outDir, "/flux-results-output")
 } else {
   # error catching
   stop("ERROR no valid username")
@@ -70,8 +79,10 @@ source("additional_info/gas_id.txt")
 source("additional_info/fixed_values.txt")
 
 # load libraries
+# the goFlux package is installed/updated in 'main_flux_calculation.R' if 'calc_fluxes_with_goflux_package == "T"'
 library(dplyr)
 library(tidyverse)
+library(stringr)
 # optional install (if not already installed) and load dplyr package
 #if(!require(dplyr)){install.packages('dplyr')}
 #if(!require(tidyverse)){install.packages('tidyverse')}
@@ -79,9 +90,6 @@ library(tidyverse)
 if (calc_fluxes_with_gasfluxes_package == "T"){
   library(gasfluxes)
 }
-if (calc_fluxes_with_goflux_package == "T"){
-  # TODO  move from functions/calculate_fluxes_with_gofluxyourself_package.R to here
-}
 
 # basic R settings
 options("scipen"=100, "digits"=7) # force R not to use exponential notation for number of up to 7 digits
@@ -91,6 +99,8 @@ options("scipen"=100, "digits"=7) # force R not to use exponential notation for
 # ------------------------------- Data pre-processing -------------------------------------------
 # =============================================================================================== #
 
+# 3. Data pre-processing ----
+
 source("main_data_preprocessing.R")
 
 # create new empty DF for data and metaData
@@ -138,14 +148,22 @@ for (measCamp in measCampList){
 # ------------------------------- Flux calculation ----------------------------------------------
 # =============================================================================================== #
 
+# 4. Flux calculation ----
 
+# source and execute the code
+source("main_flux_calculation.R")
 
 
 # =============================================================================================== #
 # ------------------------------- Flux analysis -------------------------------------------------
 # =============================================================================================== #
 
+# 5. Flux analysis ----
 
+# source and execute the code
+source("main_flux_analysis.R")
 
 
-
+#
+# result DF: goflux_data_metadata_df provided by 'combine_all_data_and_meta_data.R'
+#
\ No newline at end of file
-- 
GitLab


From 8cee44db35b2f5efe4ff61fea7d3707de0c94ae5 Mon Sep 17 00:00:00 2001
From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de>
Date: Tue, 20 Feb 2024 14:19:32 +0100
Subject: [PATCH 04/24] modify path settings for directories and remove
 correct_n2o_conc_by_meas_period.R

* correct_n2o_conc_by_meas_period.R is not used
* directories are now created if not existent
* only structural changes, no functional changes
---
 .../additional_info/correction_filter_n2o.txt |  5 ++
 .../additional_info/simulate_gc_n2o_data.txt  |  7 ++-
 .../combine_all_data_and_meta_data.R          |  3 +-
 .../correct_filter_n2o_conc_by_meas_period.R  | 15 +++---
 .../correct_n2o_conc_by_meas_period.R         | 48 -------------------
 preprocessing/functions/plot_ghg_conc_2023.R  |  7 +++
 preprocessing/main_data_preprocessing.R       | 15 +++---
 preprocessing/run_all_main.R                  | 31 ++++++++----
 8 files changed, 57 insertions(+), 74 deletions(-)
 delete mode 100644 preprocessing/functions/correct_n2o_conc_by_meas_period.R

diff --git a/preprocessing/additional_info/correction_filter_n2o.txt b/preprocessing/additional_info/correction_filter_n2o.txt
index daab75a..23b6a37 100644
--- a/preprocessing/additional_info/correction_filter_n2o.txt
+++ b/preprocessing/additional_info/correction_filter_n2o.txt
@@ -7,6 +7,11 @@ if (username == "ntriches"){
   warningDir        <- paste0(outDir, "/filter_correction_warning")
 }
 
+# create directory if it does not exist
+if (!dir.exists(warningDir)){
+  dir.create(warningDir)
+}
+
 # N2O data parameter for correction/filter functions
 ## function 01
 minLengthMPSec <- 180  # [seconds]
diff --git a/preprocessing/additional_info/simulate_gc_n2o_data.txt b/preprocessing/additional_info/simulate_gc_n2o_data.txt
index 3d1eb18..80c4dc9 100644
--- a/preprocessing/additional_info/simulate_gc_n2o_data.txt
+++ b/preprocessing/additional_info/simulate_gc_n2o_data.txt
@@ -5,7 +5,12 @@ if (username == "ntriches"){
   simGCsampleDatainfoDir          <- paste0(outDir, "/sim_gc_sample_data_info")
 }
 
-# list of filter/correction functions to run with 'f_correct_n2o_conc_sorted_by_measurementperiods'
+# create directory if it does not exist
+if (!dir.exists(simGCsampleDatainfoDir)){
+  dir.create(simGCsampleDatainfoDir)
+}
+
+# list of filter/correction functions to run with 'f_correct_filter_n2o_conc_by_meas_period'
 simGCfilterCorrFunctions          <- c(1, 2, 3, 4, 5, 8, 9, 10, 11, 12)
 
 # min length of the mp to be used for "GC simulation"
diff --git a/preprocessing/functions/combine_all_data_and_meta_data.R b/preprocessing/functions/combine_all_data_and_meta_data.R
index 342bf5e..64003fa 100644
--- a/preprocessing/functions/combine_all_data_and_meta_data.R
+++ b/preprocessing/functions/combine_all_data_and_meta_data.R
@@ -120,8 +120,9 @@ for (gf_meas_ID in goflux_best_n2o_flux_df$UniqueID) {
 # }
 
 # write DF goflux_data_metadata_df
+# create directory if it does not exist
 write.table(goflux_data_metadata_df,
-            paste0(fluxResOutDir, "/all/", currentDateTime, "_goflux_n2o_data_metadata.csv"),
+            paste0(fluxResOutDir, "/", currentDateTime, "_goflux_n2o_data_metadata.csv"),
             row.names = FALSE,
             quote = FALSE,
             sep = ",")
diff --git a/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R b/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R
index d2ea39e..1d1a2b3 100644
--- a/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R
+++ b/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R
@@ -1,12 +1,13 @@
 # main filter script for correcting GHG concentrations before flux calculation
 
-f_correct_n2o_conc_sorted_by_measurementperiods <- function(measPeriodMetaDataDF_mID,
-                                                            measPeriodN2o_mID,
-                                                            measID,
-                                                            mainDir,
-                                                            logfile_n2o_DataCorrection,
-                                                            username,
-                                                            simulateGCdata){
+f_correct_filter_n2o_conc_by_meas_period <- function(measPeriodMetaDataDF_mID,
+                                                     measPeriodN2o_mID,
+                                                     measID,
+                                                     mainDir,
+                                                     outDir,
+                                                     logfile_n2o_DataCorrection,
+                                                     username,
+                                                     simulateGCdata){
 
   print(paste0(">> start N2O data filter/correction for mp: ", measID))
   if (simulateGCdata == "T"){
diff --git a/preprocessing/functions/correct_n2o_conc_by_meas_period.R b/preprocessing/functions/correct_n2o_conc_by_meas_period.R
deleted file mode 100644
index 7f455b8..0000000
--- a/preprocessing/functions/correct_n2o_conc_by_meas_period.R
+++ /dev/null
@@ -1,48 +0,0 @@
-# USE: correct measurement periods (mps) to obtain to most stable linear fit 
-# AIM: reduce noise from closing and opening chamber (hood) in the field
-
-correct_n2o_conc_sorted_by_mp  <- function(measPeriodMetaDataDF_loc, measPeriodN2o_loc){
-  
-  print("start correcting n2o conc measurement periods")
-  
-  # create empty (new) data frames for collection of all mps
-  allN2oMeasPeriodDF <- data.frame()
-  modMeasPeriodn2o <- data.frame()
-  
-  # time difference in the start ("front") of the mp
-  diffFront <- 20 # 10 rows (measurements)
-  # time difference in the start ("back") of the mp
-  diffBack <- 10 # 10 rows (measurements)
-  
-  # loop over all mps
-  for (iLine in 1:nrow(measPeriodMetaDataDF)){
-    # choose only n2o mps measured with Aeris gas analyser
-    if(measPeriodMetaDataDF_loc$is_n2o_aeris[iLine] == 1){ 
-      
-      # choose the correct line for the loop
-      meas_ID <- measPeriodMetaDataDF_loc$meas_ID[iLine]
-      # get the right amount of rows from the original df
-      rows <- which(measPeriodN2o_loc$meas_ID == meas_ID)
-      # find the first row = find the start of the individual mp
-      first_row <- min(rows)
-      # find the last row = find the end of the individual mp
-      last_row <- max(rows)
-      # add the new first row time step = add seconds to start time -> take seconds away
-      new_first_row <- first_row + diffFront
-      # subtract the new last row time step = subtract from end time -> take seconds away
-      new_last_row <- last_row - diffBack
-      # the result is a shorter mp 
-      modMeasPeriodN2o <- measPeriodN2o_loc[new_first_row:new_last_row, ] 
-      
-      # add mp data to DF with all mp
-      allN2oMeasPeriodDF           <- rbind(allN2oMeasPeriodDF, modMeasPeriodN2o)
-      
-    }
-  }  
-  
-  print("end correcting n2o-conc measurement periods")
-  return(allN2oMeasPeriodDF)
-}
-
-
-
diff --git a/preprocessing/functions/plot_ghg_conc_2023.R b/preprocessing/functions/plot_ghg_conc_2023.R
index 73d764c..7c819a9 100644
--- a/preprocessing/functions/plot_ghg_conc_2023.R
+++ b/preprocessing/functions/plot_ghg_conc_2023.R
@@ -242,8 +242,15 @@ f_plot_ghg_conc_2023_n2o_non_mod_mp <- function(process_n2o,
   print(">> plot GHG concentrations N2O")
 
   if (process_n2o == "T"){
+    # # create output dirs if not existent
+    if (!dir.exists(paste0(figsGasConcDir, "/", expYear))){
+      dir.create(paste0(figsGasConcDir, "/", expYear))
+    }
+
+    # get vector of measurement IDs
     meas_ID <- measPeriodN2o$meas_ID
 
+    # loop over measurement IDs
     for (i in unique(meas_ID)){
       fileID <- i
       d <- subset(measPeriodN2o, meas_ID == i)
diff --git a/preprocessing/main_data_preprocessing.R b/preprocessing/main_data_preprocessing.R
index 0ec132a..1142a75 100644
--- a/preprocessing/main_data_preprocessing.R
+++ b/preprocessing/main_data_preprocessing.R
@@ -160,13 +160,14 @@ for (row in 1:nrow(measPeriodMetaDataDF)){
     measID      <- measPeriodMetaDataDF$meas_ID[row]
     rowsN2OData <- which(measPeriodN2o$meas_ID == measID)
     # returns a list of vector and data.frame
-    list_hlp    <- f_correct_n2o_conc_sorted_by_measurementperiods(measPeriodMetaDataDF[row,],
-                                                                   measPeriodN2o[rowsN2OData,],
-                                                                   measID,
-                                                                   mainDir,
-                                                                   logfile_n2o_DataCorrection,
-                                                                   username,
-                                                                   simulateGCdata)
+    list_hlp    <- f_correct_filter_n2o_conc_by_meas_period(measPeriodMetaDataDF[row,],
+                                                            measPeriodN2o[rowsN2OData,],
+                                                            measID,
+                                                            mainDir,
+                                                            outDir,
+                                                            logfile_n2o_DataCorrection,
+                                                            username,
+                                                            simulateGCdata)
     # extract vector and data.frame from list
     measPeriodMetaDataDF[row,]  <- as.data.frame(list_hlp[[1]])
     measPeriodN2oCorr           <- rbind(measPeriodN2oCorr, as.data.frame(list_hlp[[2]]))
diff --git a/preprocessing/run_all_main.R b/preprocessing/run_all_main.R
index 90a7400..c75c978 100644
--- a/preprocessing/run_all_main.R
+++ b/preprocessing/run_all_main.R
@@ -45,29 +45,40 @@ updateGoFluxPkg        <- "F"   # [F,T]
 # 2. Init main variables ----
 
 # set dirs
-# main working directory (version control with gitlab)
-# data dir (files with field data)
+# main working directory - the dir 'preprocessing/' in the git repository with these scripts
+# data dir               - files with field data
+# outDir                 - main output directory 
 # differentiate between users
 username <- Sys.getenv("USER")
 if (username == "ntriches"){
-  # Nathalie
   mainDir         <- "/home/ntriches/git_repo/data-analysis/preprocessing"
   dataDir         <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/data"
   figsGasConcDir  <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/figures_processing"
   outDir          <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output"
-  fluxResOutDir   <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/flux_analyses"
+  fluxResOutDir   <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/flux_analyses/all"
 } else if (username == "jengel"){
-  # Jan
-  mainDir         <- "/home/jengel/repo_git/nathi_data-analysis/preprocessing"
-  dataDir         <- "/home/jengel/nathalie/20231210-field-data"
-  figsGasConcDir  <- "/home/jengel/nathalie/20231210-figs_gas_conc"
-  outDir          <- "/home/jengel/nathalie/20231210-script-output"
-  fluxResOutDir   <- paste0(outDir, "/flux-results-output")
+  # ## local
+  # mainDir         <- "/home/jengel/repo_git/nathi_data-analysis/preprocessing"
+  # dataDir         <- "/home/jengel/nathalie/20231210-field-data"
+  # figsGasConcDir  <- "/home/jengel/nathalie/20231210-figs_gas_conc"
+  # outDir          <- "/home/jengel/nathalie/20231210-script-output"
+  ## BGC HPC
+  mainDir         <- "/Net/Groups/BSI/work_scratch/jengel/nathalie/repo_git/data-analysis/preprocessing"
+  dataDir         <- "/Net/Groups/BSI/work_scratch/jengel/nathalie/data-analysis_files/field_data"
+  figsGasConcDir  <- "/Net/Groups/BSI/work_scratch/jengel/nathalie/data-analysis_files/figs_gas_conc"
+  outDir          <- "/Net/Groups/BSI/work_scratch/jengel/nathalie/data-analysis_files/script_output"
+  ## all
+  fluxResOutDir   <- paste0(outDir, "/flux_results_output")
 } else {
   # error catching
   stop("ERROR no valid username")
 }
 
+# create output dirs if not existent
+if (!dir.exists(figsGasConcDir)) dir.create(figsGasConcDir)
+if (!dir.exists(outDir))         dir.create(outDir)
+if (!dir.exists(fluxResOutDir))  dir.create(fluxResOutDir)
+
 # get date & time (e.g.: '20231211_195800')
 currentDateTime <- format(Sys.time(), "%Y%m%d_%k%M%S")
 
-- 
GitLab


From 9348836f83a66fec2ae75892c3cff50c29f99942 Mon Sep 17 00:00:00 2001
From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de>
Date: Tue, 20 Feb 2024 15:23:30 +0100
Subject: [PATCH 05/24] minor changes and corrections

---
 .../calculate_fluxes_with_goflux_package.R    |  2 +-
 .../combine_all_data_and_meta_data.R          |  5 ++++
 preprocessing/main_flux_analysis.R            |  2 +-
 preprocessing/run_all_main.R                  | 28 +++++++++++--------
 4 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/preprocessing/functions/calculate_fluxes_with_goflux_package.R b/preprocessing/functions/calculate_fluxes_with_goflux_package.R
index d139bfb..4113d85 100644
--- a/preprocessing/functions/calculate_fluxes_with_goflux_package.R
+++ b/preprocessing/functions/calculate_fluxes_with_goflux_package.R
@@ -17,8 +17,8 @@ goflux_input_n2o_df <- goflux_input_n2o_df %>%
 n2o_flux_result_df <- goFlux(goflux_input_n2o_df, "N2Odry_ppb", prec = 0.2)
 
 # choose best flux
+#   docu: ?best.flux()
 goflux_best_n2o_flux_df   <- best.flux(flux.result = n2o_flux_result_df, g.limit = 1.25)
-?best.flux
 
 # plot results of best fluxes
 plot_list <- flux.plot(flux.results = goflux_best_n2o_flux_df, dataframe = goflux_input_n2o_df, gastype = "N2Odry_ppb",
diff --git a/preprocessing/functions/combine_all_data_and_meta_data.R b/preprocessing/functions/combine_all_data_and_meta_data.R
index 64003fa..e5f497b 100644
--- a/preprocessing/functions/combine_all_data_and_meta_data.R
+++ b/preprocessing/functions/combine_all_data_and_meta_data.R
@@ -5,6 +5,11 @@
 #
 # output DF: goflux_data_metadata_df
 #
+#  colnames(goflux_data_metadata_df)
+#  [1] "datetime"                 "meas_ID"                  "plot_no"                  "micro_habitat"            "par"                      "light_dark"              
+#  [7] "soil_moisture_12cm"       "soil_moisture_30cm"       "soil_temperature_15cm_s1" "soil_temperature_15cm_s2" "soil_temperature_15cm_s3" "soil_temperature_15cm_s4"
+# [13] "ch4_µmolm2sec1"           "co2_µmolm2sec1"           "n2o_nmolm2sec1"           "n2o_LMFlux"               "n2o_HMFlux"               "n2o_bestFlux"            
+# [19] "n2o_bestModel"            "measCamp"                
 
 
 # create df with all needed variables: 
diff --git a/preprocessing/main_flux_analysis.R b/preprocessing/main_flux_analysis.R
index 52a41e7..15e725a 100644
--- a/preprocessing/main_flux_analysis.R
+++ b/preprocessing/main_flux_analysis.R
@@ -15,7 +15,7 @@ print("")
 
 # 1. Combine all data and meta data ----
 # output: one dataframe that can be used for data analysis and visualisation
-source("/functions/combine_all_data_and_meta_data.R")
+source("functions/combine_all_data_and_meta_data.R")
 # to do: probably adjust code so it can run automatically
 # change output: add measCamp and date (line 124-134)
 # output should be stored here: /home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output/flux_analyses/all/
diff --git a/preprocessing/run_all_main.R b/preprocessing/run_all_main.R
index c75c978..3bf04ac 100644
--- a/preprocessing/run_all_main.R
+++ b/preprocessing/run_all_main.R
@@ -35,7 +35,7 @@ create_plots_ghg_conc  <- "T"
 simulateGCdata         <- "F"   # [F,T]
 
 # install / update goFlux package (to ensure using the most recent version) in 'main_flux_calculation.R'
-updateGoFluxPkg        <- "F"   # [F,T]
+updateGoFluxPkg        <- "T"   # [F,T]
 
 
 # =============================================================================================== #
@@ -57,18 +57,24 @@ if (username == "ntriches"){
   outDir          <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output"
   fluxResOutDir   <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/flux_analyses/all"
 } else if (username == "jengel"){
-  # ## local
-  # mainDir         <- "/home/jengel/repo_git/nathi_data-analysis/preprocessing"
-  # dataDir         <- "/home/jengel/nathalie/20231210-field-data"
-  # figsGasConcDir  <- "/home/jengel/nathalie/20231210-figs_gas_conc"
-  # outDir          <- "/home/jengel/nathalie/20231210-script-output"
-  ## BGC HPC
-  mainDir         <- "/Net/Groups/BSI/work_scratch/jengel/nathalie/repo_git/data-analysis/preprocessing"
-  dataDir         <- "/Net/Groups/BSI/work_scratch/jengel/nathalie/data-analysis_files/field_data"
-  figsGasConcDir  <- "/Net/Groups/BSI/work_scratch/jengel/nathalie/data-analysis_files/figs_gas_conc"
-  outDir          <- "/Net/Groups/BSI/work_scratch/jengel/nathalie/data-analysis_files/script_output"
+  jePlace <- "bgchpc"  # [local,bgchpc]
+  if (jePlace == "local"){
+    mainDir         <- "/home/jengel/repo_git/nathi_data-analysis/preprocessing"
+    dataDir         <- "/home/jengel/nathalie/20231210-field-data"
+    figsGasConcDir  <- "/home/jengel/nathalie/20231210-figs_gas_conc"
+    outDir          <- "/home/jengel/nathalie/20231210-script-output"
+  } else if (jePlace == "bgchpc"){
+    mainDir         <- "/Net/Groups/BSI/work_scratch/jengel/nathalie/repo_git/data-analysis/preprocessing"
+    dataDir         <- "/Net/Groups/BSI/work_scratch/jengel/nathalie/data-analysis_files/field_data"
+    figsGasConcDir  <- "/Net/Groups/BSI/work_scratch/jengel/nathalie/data-analysis_files/figs_gas_conc"
+    outDir          <- "/Net/Groups/BSI/work_scratch/jengel/nathalie/data-analysis_files/script_output"
+    # using Rstudio via ssh needs:
+    .libPaths(c("/Net/Groups/BSI/work_scratch/quincy/model/software/r_packages/r_4.3.x", "/Net/Groups/BSI/work_scratch/jengel/software/r_packages/r_4.3.x", "/opt/ohpc/pub/libs/gnu12/R/4.3.2/lib64/R/library"))
+  }
   ## all
   fluxResOutDir   <- paste0(outDir, "/flux_results_output")
+  ## overwrite default setting
+  updateGoFluxPkg <- "F"
 } else {
   # error catching
   stop("ERROR no valid username")
-- 
GitLab


From 424a5e40cda8a651570b5b02fd281d8013f288b5 Mon Sep 17 00:00:00 2001
From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de>
Date: Tue, 20 Feb 2024 16:14:00 +0100
Subject: [PATCH 06/24] bugfix: the function flux.plot() takes a max of 5
 arguments for the legend

---
 preprocessing/functions/calculate_fluxes_with_goflux_package.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/preprocessing/functions/calculate_fluxes_with_goflux_package.R b/preprocessing/functions/calculate_fluxes_with_goflux_package.R
index 4113d85..b6953c6 100644
--- a/preprocessing/functions/calculate_fluxes_with_goflux_package.R
+++ b/preprocessing/functions/calculate_fluxes_with_goflux_package.R
@@ -23,7 +23,8 @@ goflux_best_n2o_flux_df   <- best.flux(flux.result = n2o_flux_result_df, g.limit
 # plot results of best fluxes
 plot_list <- flux.plot(flux.results = goflux_best_n2o_flux_df, dataframe = goflux_input_n2o_df, gastype = "N2Odry_ppb",
                        shoulder = 20,
-                       plot.legend = c("MAE", "RMSE", "AICc", "SE", "r2", "g.factor"),
+                       plot.legend = c("RMSE", "AICc", "SE", "r2", "g.factor"), # a maximum of 5 additional parameters can be displayed above the plot
+                       #plot.legend = c("MAE", "RMSE", "AICc", "SE", "r2", "g.factor"),
                        best.model = TRUE)
 
 # save output file as pdf - figures (this function does save files to the 'present working dir' - hence, changing into 'outDir')
-- 
GitLab


From 59523206fd0722a03d06481dd46c72669124bf24 Mon Sep 17 00:00:00 2001
From: Nathalie Triches <ntriches@bgc-jena.mpg.de>
Date: Tue, 20 Feb 2024 21:00:01 +0200
Subject: [PATCH 07/24] change output dir ntriches

* one subfolder was missing so output couldn't be saved
* currently, the output produces the same error I've been fighting with before
* next step: fix that error, then try GC simulation
---
 preprocessing/run_all_main.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/preprocessing/run_all_main.R b/preprocessing/run_all_main.R
index 3bf04ac..6a17334 100644
--- a/preprocessing/run_all_main.R
+++ b/preprocessing/run_all_main.R
@@ -55,7 +55,7 @@ if (username == "ntriches"){
   dataDir         <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/data"
   figsGasConcDir  <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/figures_processing"
   outDir          <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output"
-  fluxResOutDir   <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/flux_analyses/all"
+  fluxResOutDir   <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output/flux_analyses/all"
 } else if (username == "jengel"){
   jePlace <- "bgchpc"  # [local,bgchpc]
   if (jePlace == "local"){
-- 
GitLab


From 9c2d7b5cb3d40ae4482dd3028c4e7366611c002c Mon Sep 17 00:00:00 2001
From: Nathalie Triches <ntriches@bgc-jena.mpg.de>
Date: Wed, 21 Feb 2024 12:45:59 +0200
Subject: [PATCH 08/24] unsuccessful trial to find output error

* uncommented a few lines for trial, unsuccessful
* unsure where the mistake is and where to look for it
* output error: first row of measCamp 202307 (or 202309) overwrites the flux
estimate from the last row of the previous measCamp 202305 (or 202307)
* error is only in data from goflux output, not in data from measCampMetaData.
---
 .../functions/combine_all_data_and_meta_data.R         | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/preprocessing/functions/combine_all_data_and_meta_data.R b/preprocessing/functions/combine_all_data_and_meta_data.R
index e5f497b..20d060e 100644
--- a/preprocessing/functions/combine_all_data_and_meta_data.R
+++ b/preprocessing/functions/combine_all_data_and_meta_data.R
@@ -146,12 +146,12 @@ goflux_data_metadata_df$light_dark[which(goflux_data_metadata_df$light_dark=="da
 goflux_data_metadata_df$micro_habitat[which(goflux_data_metadata_df$micro_habitat=="palsa-moss")] <- "palsa_moss"
 
 # remove first mp / row 
-# goflux_data_metadata_df <- goflux_data_metadata_df[-1, ]
+goflux_data_metadata_df <- goflux_data_metadata_df[-1, ]
 
 # remove values below -1000
 goflux_data_metadata_df <- goflux_data_metadata_df %>%
   filter(n2o_nmolm2sec1 > -500)
-
+# 
 NAs <- complete.cases(goflux_data_metadata_df)
 goflux_data_metadata_df <- goflux_data_metadata_df %>%
   filter(!NAs)
@@ -164,15 +164,15 @@ goflux_data_metadata_df <- goflux_data_metadata_df %>%
 
 measCampSpring <- goflux_data_metadata_df %>%
   filter(str_detect(datetime, "2023-05")) %>%
-  mutate(measCamp="May") 
+  mutate(measCamp="May")
 
 measCampSummer <- goflux_data_metadata_df %>%
   filter(str_detect(datetime, "2023-07")) %>%
-  mutate(measCamp="July") 
+  mutate(measCamp="July")
 
 measCampAutumn <- goflux_data_metadata_df %>%
   filter(str_detect(datetime, "2023-09")) %>%
-  mutate(measCamp="September") 
+  mutate(measCamp="September")
 
 # combine data frames from all measurement campaigns to one data set
 goflux_data_metadata_df <- measCampSpring
-- 
GitLab


From ae1ac598ce3dcdf68ea2cace817cd3f67e2d3cf6 Mon Sep 17 00:00:00 2001
From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de>
Date: Wed, 21 Feb 2024 14:57:39 +0100
Subject: [PATCH 09/24] create more output files from flux calculations and
 write all of it to the 'fluxResOutDir'

* rename the output files from flux calc, now incl. all measCamp
---
 .../calculate_fluxes_with_goflux_package.R        | 12 ++++++------
 .../functions/combine_all_data_and_meta_data.R    | 15 ++++++++-------
 preprocessing/run_all_main.R                      |  6 ++++++
 3 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/preprocessing/functions/calculate_fluxes_with_goflux_package.R b/preprocessing/functions/calculate_fluxes_with_goflux_package.R
index b6953c6..6321d94 100644
--- a/preprocessing/functions/calculate_fluxes_with_goflux_package.R
+++ b/preprocessing/functions/calculate_fluxes_with_goflux_package.R
@@ -28,14 +28,14 @@ plot_list <- flux.plot(flux.results = goflux_best_n2o_flux_df, dataframe = goflu
                        best.model = TRUE)
 
 # save output file as pdf - figures (this function does save files to the 'present working dir' - hence, changing into 'outDir')
-setwd(outDir)
+setwd(fluxResOutDir)
 flux2pdf(plot.list = plot_list, outfile = NULL, width = 11.6, height = 8.2)
 setwd(mainDir)
 
-# save output files - flux calculations
-# write.table(n2o_flux_result,
-#             paste0(outDir, "/measCamp_", measCamp, "goflux_goflux_output.csv"),
-#             row.names = FALSE, quote = FALSE, sep = ";", col.names = TRUE, na = "NA")
+# save output file from flux calculations
+write.table(n2o_flux_result,
+            paste0(fluxResOutDir, "/measCamp_", measCampListString, "goflux_goflux_n2o_output.csv"),
+            row.names = FALSE, quote = FALSE, sep = ";", col.names = TRUE, na = "NA")
 write.table(goflux_best_n2o_flux_df,
-            paste0(outDir, "/measCamp_", measCamp, "goflux_bestflux_output.csv"),
+            paste0(fluxResOutDir, "/measCamp_", measCampListString, "goflux_bestflux_n2o_output.csv"),
             row.names = FALSE, quote = FALSE, sep = ";", col.names = TRUE, na = "NA")
diff --git a/preprocessing/functions/combine_all_data_and_meta_data.R b/preprocessing/functions/combine_all_data_and_meta_data.R
index 20d060e..5dbf86d 100644
--- a/preprocessing/functions/combine_all_data_and_meta_data.R
+++ b/preprocessing/functions/combine_all_data_and_meta_data.R
@@ -125,9 +125,8 @@ for (gf_meas_ID in goflux_best_n2o_flux_df$UniqueID) {
 # }
 
 # write DF goflux_data_metadata_df
-# create directory if it does not exist
 write.table(goflux_data_metadata_df,
-            paste0(fluxResOutDir, "/", currentDateTime, "_goflux_n2o_data_metadata.csv"),
+            paste0(fluxResOutDir, "/", currentDateTime, "_", measCampListString, "goflux_n2o_data_metadata.csv"),
             row.names = FALSE,
             quote = FALSE,
             sep = ",")
@@ -145,13 +144,10 @@ goflux_data_metadata_df$light_dark[which(goflux_data_metadata_df$light_dark=="da
 # change palsa-moss to palsa_moss
 goflux_data_metadata_df$micro_habitat[which(goflux_data_metadata_df$micro_habitat=="palsa-moss")] <- "palsa_moss"
 
-# remove first mp / row 
-goflux_data_metadata_df <- goflux_data_metadata_df[-1, ]
-
 # remove values below -1000
 goflux_data_metadata_df <- goflux_data_metadata_df %>%
   filter(n2o_nmolm2sec1 > -500)
-# 
+# remove any line that contains NA
 NAs <- complete.cases(goflux_data_metadata_df)
 goflux_data_metadata_df <- goflux_data_metadata_df %>%
   filter(!NAs)
@@ -178,7 +174,12 @@ measCampAutumn <- goflux_data_metadata_df %>%
 goflux_data_metadata_df <- measCampSpring
 goflux_data_metadata_df <- rbind(goflux_data_metadata_df, measCampSummer, measCampAutumn)
 
-# TODO  write out the DF as file ?
+# write corrected DF goflux_data_metadata_df
+write.table(goflux_data_metadata_df,
+            paste0(fluxResOutDir, "/", currentDateTime, "_", measCampListString, "goflux_n2o_data_metadata_corr.csv"),
+            row.names = FALSE,
+            quote = FALSE,
+            sep = ",")
 
 print("")
 print(">>>  finished  <<<")
diff --git a/preprocessing/run_all_main.R b/preprocessing/run_all_main.R
index 6a17334..ae47d21 100644
--- a/preprocessing/run_all_main.R
+++ b/preprocessing/run_all_main.R
@@ -44,6 +44,12 @@ updateGoFluxPkg        <- "T"   # [F,T]
 
 # 2. Init main variables ----
 
+# create string with all measCamp
+measCampListString <- character()
+for (mc in measCampList){
+  measCampListString <- paste0(measCampListString, mc, "_")
+}
+
 # set dirs
 # main working directory - the dir 'preprocessing/' in the git repository with these scripts
 # data dir               - files with field data
-- 
GitLab


From f57611fdc7be99ffba72a8e3bd33e1d7100c50ac Mon Sep 17 00:00:00 2001
From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de>
Date: Wed, 21 Feb 2024 15:05:57 +0100
Subject: [PATCH 10/24] bugfix DF name n2o_flux_result_df in write() function

---
 preprocessing/functions/calculate_fluxes_with_goflux_package.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/preprocessing/functions/calculate_fluxes_with_goflux_package.R b/preprocessing/functions/calculate_fluxes_with_goflux_package.R
index 6321d94..0d10072 100644
--- a/preprocessing/functions/calculate_fluxes_with_goflux_package.R
+++ b/preprocessing/functions/calculate_fluxes_with_goflux_package.R
@@ -33,7 +33,7 @@ flux2pdf(plot.list = plot_list, outfile = NULL, width = 11.6, height = 8.2)
 setwd(mainDir)
 
 # save output file from flux calculations
-write.table(n2o_flux_result,
+write.table(n2o_flux_result_df,
             paste0(fluxResOutDir, "/measCamp_", measCampListString, "goflux_goflux_n2o_output.csv"),
             row.names = FALSE, quote = FALSE, sep = ";", col.names = TRUE, na = "NA")
 write.table(goflux_best_n2o_flux_df,
-- 
GitLab


From 6827de02ad05094942846237a4f0094cd15c45e8 Mon Sep 17 00:00:00 2001
From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de>
Date: Wed, 21 Feb 2024 16:10:15 +0100
Subject: [PATCH 11/24] re-write and error-correct data-filling of
 goflux_data_metadata_df in combine_all_data_and_meta_data.R

* not yet tested, but should work
---
 .../combine_all_data_and_meta_data.R          | 184 ++++++++----------
 1 file changed, 77 insertions(+), 107 deletions(-)

diff --git a/preprocessing/functions/combine_all_data_and_meta_data.R b/preprocessing/functions/combine_all_data_and_meta_data.R
index 5dbf86d..ea06821 100644
--- a/preprocessing/functions/combine_all_data_and_meta_data.R
+++ b/preprocessing/functions/combine_all_data_and_meta_data.R
@@ -1,7 +1,8 @@
-# combine all data and meta data 
+#
+# combine all data and meta data needed for analysis into one DF
 # 
 #
-# input DF: goflux_best_n2o_flux_df
+# input DF: goflux_best_n2o_flux_df & measPeriodMetaDataDF
 #
 # output DF: goflux_data_metadata_df
 #
@@ -12,117 +13,86 @@
 # [19] "n2o_bestModel"            "measCamp"                
 
 
-# create df with all needed variables: 
-# measurement ID, plot number, micro habitat,
-# PAR, light or dark measurement (L / D), 
-# soil moisture (VWC12 and VWC30), soil temperature (1-4)
-# flux estimates for N2O, CH4, CO2 
-# add start_time to plot the whole year
+# number of measurement periods (mp)
+goflux_total_mp <- nrow(goflux_best_n2o_flux_df) 
+print(paste0("  number of mp: ", goflux_total_mp))
 
-measurement_periods_total   <- nrow(goflux_best_n2o_flux_df) 
-print(paste0("number of mp: ", measurement_periods_total))
+# output summary of DF to screen
 str(goflux_best_n2o_flux_df)
-# run this for all measurement campaigns 
+print("")
+
+# create new DF with selected output variables
 goflux_data_metadata_df <- data.frame(
-  datetime                 = as.POSIXct(rep("1000-01-01 00:00:00", measurement_periods_total), format = "%Y-%m-%d %H:%M:%S", tz="UTC" ),
-  meas_ID                  = rep(NA, measurement_periods_total),
-  plot_no                  = rep(NA, measurement_periods_total),
-  micro_habitat            = rep(NA, measurement_periods_total),
-  par                      = rep(NA, measurement_periods_total),
-  light_dark               = rep(NA, measurement_periods_total),
-  soil_moisture_12cm       = rep(NA, measurement_periods_total),
-  soil_moisture_30cm       = rep(NA, measurement_periods_total),
-  soil_temperature_15cm_s1 = rep(NA, measurement_periods_total),
-  soil_temperature_15cm_s2 = rep(NA, measurement_periods_total),
-  soil_temperature_15cm_s3 = rep(NA, measurement_periods_total),
-  soil_temperature_15cm_s4 = rep(NA, measurement_periods_total),
-  ch4_µmolm2sec1           = rep(NA, measurement_periods_total),
-  co2_µmolm2sec1           = rep(NA, measurement_periods_total),
-  n2o_nmolm2sec1           = rep(NA, measurement_periods_total),
-  n2o_LMFlux               = rep(NA, measurement_periods_total),
-  n2o_HMFlux               = rep(NA, measurement_periods_total),
-  n2o_bestFlux             = rep(NA, measurement_periods_total),
-  n2o_bestModel            = rep(NA, measurement_periods_total)
+  datetime                 = as.POSIXct(rep("1000-01-01 00:00:00", goflux_total_mp), format = "%Y-%m-%d %H:%M:%S", tz="UTC" ),
+  meas_ID                  = rep(NA, goflux_total_mp),
+  plot_no                  = rep(NA, goflux_total_mp),
+  micro_habitat            = rep(NA, goflux_total_mp),
+  par                      = rep(NA, goflux_total_mp),
+  light_dark               = rep(NA, goflux_total_mp),
+  soil_moisture_12cm       = rep(NA, goflux_total_mp),
+  soil_moisture_30cm       = rep(NA, goflux_total_mp),
+  soil_temperature_15cm_s1 = rep(NA, goflux_total_mp),
+  soil_temperature_15cm_s2 = rep(NA, goflux_total_mp),
+  soil_temperature_15cm_s3 = rep(NA, goflux_total_mp),
+  soil_temperature_15cm_s4 = rep(NA, goflux_total_mp),
+  ch4_µmolm2sec1           = rep(NA, goflux_total_mp),
+  co2_µmolm2sec1           = rep(NA, goflux_total_mp),
+  n2o_nmolm2sec1           = rep(NA, goflux_total_mp),
+  n2o_LMFlux               = rep(NA, goflux_total_mp),
+  n2o_HMFlux               = rep(NA, goflux_total_mp),
+  n2o_bestFlux             = rep(NA, goflux_total_mp),
+  n2o_bestModel            = rep(NA, goflux_total_mp)
 )
 
-# fill in values from measPeriodMetaDataDF (same amount of rows) and 
-# goflux_best_n2o_flux_df with best flux = N2O nmol
-# remove invalid mps
-measPeriodMetaDataDF <- measPeriodMetaDataDF %>%
-  filter(is_mp_valid == 1)
-
-# loop over all mp provided by goflux package output individually
-for (gf_meas_ID in goflux_best_n2o_flux_df$UniqueID) {
-  #print(gf_meas_ID)
-  # get the rows corresponding with the gf_meas_ID
-  line_Num_gf_results_df <- which(goflux_best_n2o_flux_df$UniqueID  == gf_meas_ID)
-  line_Num_metadata_df   <- which(measPeriodMetaDataDF$meas_ID == gf_meas_ID)
-  
-  # choose only n2o mps measured with Aeris gas analyser
- # if(measPeriodMetaDataDF$is_n2o_aeris[line_Num_metadata_df] == 1){
-  
-  goflux_data_metadata_df$datetime[line_Num_gf_results_df]         <- measPeriodMetaDataDF$datetime_mp_first[line_Num_metadata_df] # datetime from measurement start 
-  #goflux_data_metadata_df$datetime[line_Num_gf_results_df] <- as.POSIXct(measPeriodMetaDataDF$datetime_mp_first[line_Num_metadata_df], format = "%Y-%m-%d %H:%M:%S", tz="UTC")
-  goflux_data_metadata_df$meas_ID[line_Num_gf_results_df]          <- measPeriodMetaDataDF$meas_ID[line_Num_metadata_df]
-  goflux_data_metadata_df$plot_no[line_Num_gf_results_df]          <- measPeriodMetaDataDF$plot_no[line_Num_metadata_df]
-  goflux_data_metadata_df$micro_habitat[line_Num_gf_results_df]    <- measPeriodMetaDataDF$micro_habitat[line_Num_metadata_df]
-  goflux_data_metadata_df$light_dark[line_Num_gf_results_df]       <- measPeriodMetaDataDF$light_dark[line_Num_metadata_df]
-  goflux_data_metadata_df$n2o_nmolm2sec1[line_Num_gf_results_df]   <- goflux_best_n2o_flux_df$best.flux[line_Num_metadata_df]
-  goflux_data_metadata_df$n2o_LMFlux[line_Num_gf_results_df]       <- goflux_best_n2o_flux_df$LM.flux[line_Num_metadata_df]
-  goflux_data_metadata_df$n2o_HMFlux[line_Num_gf_results_df]       <- goflux_best_n2o_flux_df$HM.flux[line_Num_metadata_df]
-  goflux_data_metadata_df$n2o_bestFlux[line_Num_gf_results_df]     <- goflux_best_n2o_flux_df$best.flux[line_Num_metadata_df]
-  goflux_data_metadata_df$n2o_bestModel[line_Num_gf_results_df]    <- goflux_best_n2o_flux_df$model[line_Num_metadata_df]
-}
+# # remove invalid mp from metDataDF
+# measPeriodMetaDataDF <- measPeriodMetaDataDF %>%
+#   filter(is_mp_valid == 1)
 
-# fill in values from ppN2OdataDF (differing amount of rows)
-# loop over all mp provided by gas_fluxes pkg output individually
-for (gf_meas_ID in goflux_best_n2o_flux_df$UniqueID) {
-  line_Num_metadata_df   <- which(measPeriodMetaDataDF$meas_ID == gf_meas_ID)
+# fill the 'goflux_data_metadata_df' with data from three different DF
+#   goflux_best_n2o_flux_df  << goFlux
+#   measPeriodMetaDataDF     << metaData
+#   ppN2OdataDF              << environmental data
+#
+# loop over all mp provided by goflux-package output
+for (goflux_row in 1:goflux_total_mp) {
+  # get and output UniqueID
+  gf_uID <- goflux_best_n2o_flux_df$UniqueID[goflux_row]
+  #print(gf_uID)
+
+  # get the rows of goFlux output and metDataDF corresponding with the gf_uID
+  rowNum_goFluxDF    <- goflux_row
+  rowNum_metaDataDF  <- which(measPeriodMetaDataDF$meas_ID     == gf_uID)
+  rowNum_ppN2OdataDF <- which(ppN2OdataDF$meas_ID              == gf_uID)
+
+  # test if meas_ID does exist (only once) in metaDataDF & ppN2OdataDF
+  if (! length(rowNum_metaDataDF) == 1)  stop(paste0("ERROR no match of measID in metaDataDF and UniqueID of goFlux, UniqueID: ", gf_uID))
+  if (! length(rowNum_ppN2OdataDF) == 1) stop(paste0("ERROR no match of measID in ppN2OdataDF and UniqueID of goFlux, UniqueID: ", gf_uID))
   
-  # choose only n2o mps measured with Aeris gas analyser
-  if(measPeriodMetaDataDF$is_n2o_aeris[line_Num_metadata_df] == 1){
-    
-    # get the rows (with all measurements) corresponding with the gf_meas_ID
-    row_target <- which(goflux_data_metadata_df$meas_ID == gf_meas_ID)
-    # docu
-    rows_source <- which(ppN2OdataDF$meas_ID == gf_meas_ID)
-    
-    # for all these below: we need the average for the whole measPeriod 
-    # ideally just some time in the middle of the measurement (think about it)
-    goflux_data_metadata_df$par[row_target]                       <- median(ppN2OdataDF$par[rows_source], na.rm = FALSE)
-    goflux_data_metadata_df$soil_moisture_12cm[row_target]        <- median(ppN2OdataDF$vwc12[rows_source], na.rm = FALSE)
-    goflux_data_metadata_df$soil_moisture_30cm[row_target]        <- median(ppN2OdataDF$vwc30[rows_source], na.rm = FALSE)
-    goflux_data_metadata_df$soil_temperature_15cm_s1[row_target]  <- median(ppN2OdataDF$soilT1degC[rows_source], na.rm = FALSE)
-    goflux_data_metadata_df$soil_temperature_15cm_s2[row_target]  <- median(ppN2OdataDF$soilT2degC[rows_source], na.rm = FALSE)
-    goflux_data_metadata_df$soil_temperature_15cm_s3[row_target]  <- median(ppN2OdataDF$soilT3degC[rows_source], na.rm = FALSE)
-    goflux_data_metadata_df$soil_temperature_15cm_s4[row_target]  <- median(ppN2OdataDF$soilT4degC[rows_source], na.rm = FALSE)    
-  }
-}
+  # goflux_best_n2o_flux_df << goFlux
+  goflux_data_metadata_df$n2o_nmolm2sec1[goflux_row]   <- goflux_best_n2o_flux_df$best.flux[rowNum_goFluxDF]
+  goflux_data_metadata_df$n2o_LMFlux[goflux_row]       <- goflux_best_n2o_flux_df$LM.flux[rowNum_goFluxDF]
+  goflux_data_metadata_df$n2o_HMFlux[goflux_row]       <- goflux_best_n2o_flux_df$HM.flux[rowNum_goFluxDF]
+  goflux_data_metadata_df$n2o_bestFlux[goflux_row]     <- goflux_best_n2o_flux_df$best.flux[rowNum_goFluxDF]
+  goflux_data_metadata_df$n2o_bestModel[goflux_row]    <- goflux_best_n2o_flux_df$model[rowNum_goFluxDF]
+
+  # measPeriodMetaDataDF << metaData
+  goflux_data_metadata_df$datetime[goflux_row]         <- measPeriodMetaDataDF$datetime_mp_first[rowNum_metaDataDF] # datetime from measurement start 
+  #goflux_data_metadata_df$datetime[goflux_row]         <- as.POSIXct(measPeriodMetaDataDF$datetime_mp_first[rowNum_metaDataDF], format = "%Y-%m-%d %H:%M:%S", tz="UTC")
+  goflux_data_metadata_df$meas_ID[goflux_row]          <- measPeriodMetaDataDF$meas_ID[rowNum_metaDataDF]
+  goflux_data_metadata_df$plot_no[goflux_row]          <- measPeriodMetaDataDF$plot_no[rowNum_metaDataDF]
+  goflux_data_metadata_df$micro_habitat[goflux_row]    <- measPeriodMetaDataDF$micro_habitat[rowNum_metaDataDF]
+  goflux_data_metadata_df$light_dark[goflux_row]       <- measPeriodMetaDataDF$light_dark[rowNum_metaDataDF]
+
+  # ppN2OdataDF << environmental data
+  goflux_data_metadata_df$par[goflux_row]                       <- median(ppN2OdataDF$par[rowNum_ppN2OdataDF], na.rm = FALSE)
+  goflux_data_metadata_df$soil_moisture_12cm[goflux_row]        <- median(ppN2OdataDF$vwc12[rowNum_ppN2OdataDF], na.rm = FALSE)
+  goflux_data_metadata_df$soil_moisture_30cm[goflux_row]        <- median(ppN2OdataDF$vwc30[rowNum_ppN2OdataDF], na.rm = FALSE)
+  goflux_data_metadata_df$soil_temperature_15cm_s1[goflux_row]  <- median(ppN2OdataDF$soilT1degC[rowNum_ppN2OdataDF], na.rm = FALSE)
+  goflux_data_metadata_df$soil_temperature_15cm_s2[goflux_row]  <- median(ppN2OdataDF$soilT2degC[rowNum_ppN2OdataDF], na.rm = FALSE)
+  goflux_data_metadata_df$soil_temperature_15cm_s3[goflux_row]  <- median(ppN2OdataDF$soilT3degC[rowNum_ppN2OdataDF], na.rm = FALSE)
+  goflux_data_metadata_df$soil_temperature_15cm_s4[goflux_row]  <- median(ppN2OdataDF$soilT4degC[rowNum_ppN2OdataDF], na.rm = FALSE)    
 
-# check if it works for all measCamp 
-# # loop over all mps
-# for (iLine in 1:nrow(measPeriodMetaDataDF)){
-#   # choose only n2o mps measured with Aeris gas analyser
-#   if(measPeriodMetaDataDF$is_n2o_aeris[iLine] == 1){
-#     
-#     # get the meas_ID, ID or the mp, of this line
-#     meas_ID <- measPeriodMetaDataDF$meas_ID[iLine]
-#     # get the rows (with all measurements) corresponding with the meas_ID
-#     row_target <- which(goflux_data_metadata_df$meas_ID == meas_ID)
-#     # docu
-#     rows_source <- which(measPeriodN2o$meas_ID == meas_ID)
-#     
-#     # for all these below: we need the average for the whole measPeriod 
-#     # ideally just some time in the middle of the measurement (think about it)
-#     goflux_data_metadata_df$par[row_target]                       <- median(measPeriodN2o$par[rows_source], na.rm = FALSE)
-#     goflux_data_metadata_df$soil_moisture_12cm[row_target]        <- median(measPeriodN2o$vwc12[rows_source], na.rm = FALSE)
-#     goflux_data_metadata_df$soil_moisture_30cm[row_target]        <- median(measPeriodN2o$vwc30[rows_source], na.rm = FALSE)
-#     goflux_data_metadata_df$soil_temperature_15cm_s1[row_target]  <- median(measPeriodN2o$soilT1degC[rows_source], na.rm = FALSE)
-#     goflux_data_metadata_df$soil_temperature_15cm_s2[row_target]  <- median(measPeriodN2o$soilT2degC[rows_source], na.rm = FALSE)
-#     goflux_data_metadata_df$soil_temperature_15cm_s3[row_target]  <- median(measPeriodN2o$soilT3degC[rows_source], na.rm = FALSE)
-#     goflux_data_metadata_df$soil_temperature_15cm_s4[row_target]  <- median(measPeriodN2o$soilT4degC[rows_source], na.rm = FALSE)
-#   }
-# }
+}
 
 # write DF goflux_data_metadata_df
 write.table(goflux_data_metadata_df,
@@ -148,9 +118,9 @@ goflux_data_metadata_df$micro_habitat[which(goflux_data_metadata_df$micro_habita
 goflux_data_metadata_df <- goflux_data_metadata_df %>%
   filter(n2o_nmolm2sec1 > -500)
 # remove any line that contains NA
-NAs <- complete.cases(goflux_data_metadata_df)
+rowContainsNoNA <- complete.cases(goflux_data_metadata_df)
 goflux_data_metadata_df <- goflux_data_metadata_df %>%
-  filter(!NAs)
+  filter(rowContainsNoNA)
 
 
 # create new DF - one per measCampaign
-- 
GitLab


From 8af9c3ec308684ef96a2d1d0692c8e0369a67b12 Mon Sep 17 00:00:00 2001
From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de>
Date: Wed, 21 Feb 2024 16:38:32 +0100
Subject: [PATCH 12/24] minor additional changes in creating the results DF

---
 .../combine_all_data_and_meta_data.R          | 34 ++++++++++---------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/preprocessing/functions/combine_all_data_and_meta_data.R b/preprocessing/functions/combine_all_data_and_meta_data.R
index ea06821..2b7dd10 100644
--- a/preprocessing/functions/combine_all_data_and_meta_data.R
+++ b/preprocessing/functions/combine_all_data_and_meta_data.R
@@ -23,25 +23,28 @@ print("")
 
 # create new DF with selected output variables
 goflux_data_metadata_df <- data.frame(
+  # metaData
   datetime                 = as.POSIXct(rep("1000-01-01 00:00:00", goflux_total_mp), format = "%Y-%m-%d %H:%M:%S", tz="UTC" ),
   meas_ID                  = rep(NA, goflux_total_mp),
   plot_no                  = rep(NA, goflux_total_mp),
   micro_habitat            = rep(NA, goflux_total_mp),
-  par                      = rep(NA, goflux_total_mp),
   light_dark               = rep(NA, goflux_total_mp),
+  # goFlux
+  #ch4_mumolm2sec1          = rep(NA, goflux_total_mp),  # TODO
+  #co2_mumolm2sec1          = rep(NA, goflux_total_mp),  # TODO
+  n2o_nmolm2sec1           = rep(NA, goflux_total_mp),
+  n2o_LMFlux               = rep(NA, goflux_total_mp),
+  n2o_HMFlux               = rep(NA, goflux_total_mp),
+  n2o_bestFlux             = rep(NA, goflux_total_mp),
+  n2o_bestModel            = rep(NA, goflux_total_mp)
+  # environmental data
+  par                      = rep(NA, goflux_total_mp),
   soil_moisture_12cm       = rep(NA, goflux_total_mp),
   soil_moisture_30cm       = rep(NA, goflux_total_mp),
   soil_temperature_15cm_s1 = rep(NA, goflux_total_mp),
   soil_temperature_15cm_s2 = rep(NA, goflux_total_mp),
   soil_temperature_15cm_s3 = rep(NA, goflux_total_mp),
   soil_temperature_15cm_s4 = rep(NA, goflux_total_mp),
-  ch4_µmolm2sec1           = rep(NA, goflux_total_mp),
-  co2_µmolm2sec1           = rep(NA, goflux_total_mp),
-  n2o_nmolm2sec1           = rep(NA, goflux_total_mp),
-  n2o_LMFlux               = rep(NA, goflux_total_mp),
-  n2o_HMFlux               = rep(NA, goflux_total_mp),
-  n2o_bestFlux             = rep(NA, goflux_total_mp),
-  n2o_bestModel            = rep(NA, goflux_total_mp)
 )
 
 # # remove invalid mp from metDataDF
@@ -67,22 +70,21 @@ for (goflux_row in 1:goflux_total_mp) {
   # test if meas_ID does exist (only once) in metaDataDF & ppN2OdataDF
   if (! length(rowNum_metaDataDF) == 1)  stop(paste0("ERROR no match of measID in metaDataDF and UniqueID of goFlux, UniqueID: ", gf_uID))
   if (! length(rowNum_ppN2OdataDF) == 1) stop(paste0("ERROR no match of measID in ppN2OdataDF and UniqueID of goFlux, UniqueID: ", gf_uID))
-  
-  # goflux_best_n2o_flux_df << goFlux
-  goflux_data_metadata_df$n2o_nmolm2sec1[goflux_row]   <- goflux_best_n2o_flux_df$best.flux[rowNum_goFluxDF]
-  goflux_data_metadata_df$n2o_LMFlux[goflux_row]       <- goflux_best_n2o_flux_df$LM.flux[rowNum_goFluxDF]
-  goflux_data_metadata_df$n2o_HMFlux[goflux_row]       <- goflux_best_n2o_flux_df$HM.flux[rowNum_goFluxDF]
-  goflux_data_metadata_df$n2o_bestFlux[goflux_row]     <- goflux_best_n2o_flux_df$best.flux[rowNum_goFluxDF]
-  goflux_data_metadata_df$n2o_bestModel[goflux_row]    <- goflux_best_n2o_flux_df$model[rowNum_goFluxDF]
 
   # measPeriodMetaDataDF << metaData
   goflux_data_metadata_df$datetime[goflux_row]         <- measPeriodMetaDataDF$datetime_mp_first[rowNum_metaDataDF] # datetime from measurement start 
-  #goflux_data_metadata_df$datetime[goflux_row]         <- as.POSIXct(measPeriodMetaDataDF$datetime_mp_first[rowNum_metaDataDF], format = "%Y-%m-%d %H:%M:%S", tz="UTC")
   goflux_data_metadata_df$meas_ID[goflux_row]          <- measPeriodMetaDataDF$meas_ID[rowNum_metaDataDF]
   goflux_data_metadata_df$plot_no[goflux_row]          <- measPeriodMetaDataDF$plot_no[rowNum_metaDataDF]
   goflux_data_metadata_df$micro_habitat[goflux_row]    <- measPeriodMetaDataDF$micro_habitat[rowNum_metaDataDF]
   goflux_data_metadata_df$light_dark[goflux_row]       <- measPeriodMetaDataDF$light_dark[rowNum_metaDataDF]
 
+  # goflux_best_n2o_flux_df << goFlux
+  goflux_data_metadata_df$n2o_nmolm2sec1[goflux_row]   <- goflux_best_n2o_flux_df$best.flux[rowNum_goFluxDF]
+  goflux_data_metadata_df$n2o_LMFlux[goflux_row]       <- goflux_best_n2o_flux_df$LM.flux[rowNum_goFluxDF]
+  goflux_data_metadata_df$n2o_HMFlux[goflux_row]       <- goflux_best_n2o_flux_df$HM.flux[rowNum_goFluxDF]
+  goflux_data_metadata_df$n2o_bestFlux[goflux_row]     <- goflux_best_n2o_flux_df$best.flux[rowNum_goFluxDF]
+  goflux_data_metadata_df$n2o_bestModel[goflux_row]    <- goflux_best_n2o_flux_df$model[rowNum_goFluxDF]
+
   # ppN2OdataDF << environmental data
   goflux_data_metadata_df$par[goflux_row]                       <- median(ppN2OdataDF$par[rowNum_ppN2OdataDF], na.rm = FALSE)
   goflux_data_metadata_df$soil_moisture_12cm[goflux_row]        <- median(ppN2OdataDF$vwc12[rowNum_ppN2OdataDF], na.rm = FALSE)
-- 
GitLab


From efed52a8ed54b25e51b92d103e65b0e5655882a1 Mon Sep 17 00:00:00 2001
From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de>
Date: Wed, 21 Feb 2024 16:45:19 +0100
Subject: [PATCH 13/24] bugix

---
 preprocessing/functions/combine_all_data_and_meta_data.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/preprocessing/functions/combine_all_data_and_meta_data.R b/preprocessing/functions/combine_all_data_and_meta_data.R
index 2b7dd10..754cb12 100644
--- a/preprocessing/functions/combine_all_data_and_meta_data.R
+++ b/preprocessing/functions/combine_all_data_and_meta_data.R
@@ -36,7 +36,7 @@ goflux_data_metadata_df <- data.frame(
   n2o_LMFlux               = rep(NA, goflux_total_mp),
   n2o_HMFlux               = rep(NA, goflux_total_mp),
   n2o_bestFlux             = rep(NA, goflux_total_mp),
-  n2o_bestModel            = rep(NA, goflux_total_mp)
+  n2o_bestModel            = rep(NA, goflux_total_mp),
   # environmental data
   par                      = rep(NA, goflux_total_mp),
   soil_moisture_12cm       = rep(NA, goflux_total_mp),
@@ -44,7 +44,7 @@ goflux_data_metadata_df <- data.frame(
   soil_temperature_15cm_s1 = rep(NA, goflux_total_mp),
   soil_temperature_15cm_s2 = rep(NA, goflux_total_mp),
   soil_temperature_15cm_s3 = rep(NA, goflux_total_mp),
-  soil_temperature_15cm_s4 = rep(NA, goflux_total_mp),
+  soil_temperature_15cm_s4 = rep(NA, goflux_total_mp)
 )
 
 # # remove invalid mp from metDataDF
-- 
GitLab


From c336cdc8d4c4bdd6e065c7c30397ea9f26b99e23 Mon Sep 17 00:00:00 2001
From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de>
Date: Wed, 21 Feb 2024 16:54:52 +0100
Subject: [PATCH 14/24] bugfix

---
 preprocessing/functions/combine_all_data_and_meta_data.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/preprocessing/functions/combine_all_data_and_meta_data.R b/preprocessing/functions/combine_all_data_and_meta_data.R
index 754cb12..ad06edf 100644
--- a/preprocessing/functions/combine_all_data_and_meta_data.R
+++ b/preprocessing/functions/combine_all_data_and_meta_data.R
@@ -68,8 +68,8 @@ for (goflux_row in 1:goflux_total_mp) {
   rowNum_ppN2OdataDF <- which(ppN2OdataDF$meas_ID              == gf_uID)
 
   # test if meas_ID does exist (only once) in metaDataDF & ppN2OdataDF
-  if (! length(rowNum_metaDataDF) == 1)  stop(paste0("ERROR no match of measID in metaDataDF and UniqueID of goFlux, UniqueID: ", gf_uID))
-  if (! length(rowNum_ppN2OdataDF) == 1) stop(paste0("ERROR no match of measID in ppN2OdataDF and UniqueID of goFlux, UniqueID: ", gf_uID))
+  if (! length(rowNum_metaDataDF) == 1)  stop(paste0("ERROR! no match of measID in metaDataDF and UniqueID of goFlux, UniqueID: ", gf_uID))
+  if (length(rowNum_ppN2OdataDF) < 1)    stop(paste0("ERROR! no match of measID in ppN2OdataDF and UniqueID of goFlux, UniqueID: ", gf_uID))
 
   # measPeriodMetaDataDF << metaData
   goflux_data_metadata_df$datetime[goflux_row]         <- measPeriodMetaDataDF$datetime_mp_first[rowNum_metaDataDF] # datetime from measurement start 
-- 
GitLab


From 4172d2cc3f702c9c73d5426de66d0f00d1689649 Mon Sep 17 00:00:00 2001
From: Nathalie Triches <ntriches@bgc-jena.mpg.de>
Date: Thu, 22 Feb 2024 07:59:57 +0200
Subject: [PATCH 15/24] start visualise fluxes

* noticed two different .csv files, trying to figure out which one is the
correct one AND contains info about measCamp (May, July, Sept)
* currently, the one containing info about measCamp has 400 less..
---
 .../functions/visualise_fluxes_from_goflux.R  | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/preprocessing/functions/visualise_fluxes_from_goflux.R b/preprocessing/functions/visualise_fluxes_from_goflux.R
index b319355..7f62485 100644
--- a/preprocessing/functions/visualise_fluxes_from_goflux.R
+++ b/preprocessing/functions/visualise_fluxes_from_goflux.R
@@ -3,6 +3,26 @@
 #
 # input DF goflux_data_metadata_df provided by 'combine_all_data_and_meta_data.R'
 #
+# tests 22 Feb 2024
+goflux_data_metadata_df <- read.csv("/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output/flux_analyses/all/20240221_182509_202305_202307_202309_goflux_n2o_data_metadata.csv")
+
+# library(dplyr)
+# library(tidyverse)
+# measCampSpring <- goflux_data_metadata_df %>%
+#   filter(str_detect(datetime, "2023-05")) %>%
+#   mutate(measCamp="May")
+# 
+# measCampSummer <- goflux_data_metadata_df %>%
+#   filter(str_detect(datetime, "2023-07")) %>%
+#   mutate(measCamp="July")
+# 
+# measCampAutumn <- goflux_data_metadata_df %>%
+#   filter(str_detect(datetime, "2023-09")) %>%
+#   mutate(measCamp="September")
+# 
+# # rejoin
+# test <- measCampSpring
+# test <- rbind(test, measCampSummer, measCampAutumn)
 
 
 # R base plots ---------------------------------------------------------------
-- 
GitLab


From 4197156fae0b780b6c7836ece7003be1a34bb708 Mon Sep 17 00:00:00 2001
From: Nathalie Triches <ntriches@bgc-jena.mpg.de>
Date: Thu, 22 Feb 2024 10:14:34 +0200
Subject: [PATCH 16/24] continue visualising fluxes

* for now, I worked with the (...)_corr.csv df with only ~ 640 obs
* I will wait with testing the GC comparision and running all measCamp with shorter
chamber clusure times
* next steps: make sure that code to combine dfs is really working and giving me
all my obs
---
 .../functions/visualise_fluxes_from_goflux.R  | 39 ++++++++++++-------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/preprocessing/functions/visualise_fluxes_from_goflux.R b/preprocessing/functions/visualise_fluxes_from_goflux.R
index 7f62485..1978c8a 100644
--- a/preprocessing/functions/visualise_fluxes_from_goflux.R
+++ b/preprocessing/functions/visualise_fluxes_from_goflux.R
@@ -4,7 +4,9 @@
 # input DF goflux_data_metadata_df provided by 'combine_all_data_and_meta_data.R'
 #
 # tests 22 Feb 2024
-goflux_data_metadata_df <- read.csv("/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output/flux_analyses/all/20240221_182509_202305_202307_202309_goflux_n2o_data_metadata.csv")
+goflux_data_metadata_df <- read.csv("/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output/flux_analyses/all/20240221_182509_202305_202307_202309_goflux_n2o_data_metadata_corr.csv")
+str(goflux_data_metadata_df)
+goflux_data_metadata_df$datetime <- as.POSIXct(goflux_data_metadata_df$datetime)
 
 # library(dplyr)
 # library(tidyverse)
@@ -173,8 +175,8 @@ n2o_season_palsalichen <- goflux_data_metadata_df %>%
   labs(
     title = "Palsa lichen") +
   theme_bw() +
-  facet_wrap(vars(measCamp), scales = "free") +
-  My_Theme
+  facet_wrap(vars(measCamp), scales = "free_x") 
+  #My_Theme
 n2o_season_palsalichen
 
 # palsa moss 8####
@@ -193,11 +195,11 @@ n2o_season_pm8 <- goflux_data_metadata_df %>%
   labs(
     title = "Palsa moss") +
   theme_bw() +
-  facet_wrap(vars(measCamp), scales = "free") +
-  My_Theme
+  facet_wrap(vars(measCamp), scales = "free_x") 
+  # My_Theme
 n2o_season_pm8
 
-# palsa_moss -8 ####
+# palsa_moss -8 ----
 n2o_season_palsamoss <- goflux_data_metadata_df %>%
   filter(micro_habitat == "palsa_moss") %>%
   filter(plot_no != 8) %>%
@@ -213,11 +215,11 @@ n2o_season_palsamoss <- goflux_data_metadata_df %>%
   labs(
     title = "Palsa moss without plot 8") +
   theme_bw() +
-  facet_wrap(vars(measCamp), scales = "free") +
-  My_Theme
+  facet_wrap(vars(measCamp), scales = "free_x") 
+  #My_Theme
 n2o_season_palsamoss
 
-# N2O fluxes over season: bog plots 
+# N2O fluxes over season: bog plots ----
 n2o_season_bog <- goflux_data_metadata_df %>%
   filter(micro_habitat == "bog") %>%
   mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>%
@@ -232,11 +234,11 @@ n2o_season_bog <- goflux_data_metadata_df %>%
   labs(
     title = "Bog") +
   theme_bw() +
-  facet_wrap(vars(measCamp), scales = "free") +
-  My_Theme
+  facet_wrap(vars(measCamp), scales = "free_x") 
+  #My_Theme
 n2o_season_bog
 
-# N2O fluxes over season: fen plots 
+# N2O fluxes over season: fen plots ----
 n2o_season_fen <- goflux_data_metadata_df %>%
   filter(micro_habitat == "fen") %>%
   mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>%
@@ -251,11 +253,18 @@ n2o_season_fen <- goflux_data_metadata_df %>%
   labs(
     title = "Fen") +
   theme_bw() +
-  facet_wrap(vars(measCamp), scales = "free") +
-  My_Theme
+  facet_wrap(vars(measCamp), scales = "free_x") 
+  #
+  # My_Theme
 n2o_season_fen
 
-# Boxplots ####
+# install.packages("gridExtra")
+library(gridExtra)
+
+grid.arrange(n2o_season_fen, n2o_season_bog, n2o_season_palsamoss,  n2o_season_palsalichen)
+
+
+# BoxplogridExtra# Boxplots ####
 # L + D ####
 dark_light_season <- goflux_data_metadata_df %>%
   mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>%
-- 
GitLab


From 398ca5bf60435af850c0e48bc42f673069a4cc6b Mon Sep 17 00:00:00 2001
From: Nathalie Triches <ntriches@bgc-jena.mpg.de>
Date: Thu, 22 Feb 2024 13:20:00 +0200
Subject: [PATCH 17/24] fix measCamp issue in visualise_fluxes

* I just added the rows in simple base R to denote the measurement camp -->
no splitting and merging needed
* with this, I created a lot of plots and did some first correlation plots, too
* also some stats
* next steps: see if GC simulation runs
---
 .../functions/visualise_fluxes_from_goflux.R  | 189 ++++++++++++++----
 1 file changed, 147 insertions(+), 42 deletions(-)

diff --git a/preprocessing/functions/visualise_fluxes_from_goflux.R b/preprocessing/functions/visualise_fluxes_from_goflux.R
index 1978c8a..a340000 100644
--- a/preprocessing/functions/visualise_fluxes_from_goflux.R
+++ b/preprocessing/functions/visualise_fluxes_from_goflux.R
@@ -3,28 +3,78 @@
 #
 # input DF goflux_data_metadata_df provided by 'combine_all_data_and_meta_data.R'
 #
-# tests 22 Feb 2024
-goflux_data_metadata_df <- read.csv("/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output/flux_analyses/all/20240221_182509_202305_202307_202309_goflux_n2o_data_metadata_corr.csv")
-str(goflux_data_metadata_df)
+# read 22 Feb 2024 ----
+goflux_data_metadata_df <- read.csv("/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output/flux_analyses/all/20240221_182509_202305_202307_202309_goflux_n2o_data_metadata.csv")
+# add empty column to data frame
+goflux_data_metadata_df$measCamp <- NA
+# add measCamp using row numbers
+goflux_data_metadata_df$measCamp[1:662] <- "May"
+goflux_data_metadata_df$measCamp[663:789] <- "July"
+goflux_data_metadata_df$measCamp[790:1000] <- "September"
+# change datetime to POSIXct
 goflux_data_metadata_df$datetime <- as.POSIXct(goflux_data_metadata_df$datetime)
+# remove white space from micro habitats
+goflux_data_metadata_df$micro_habitat  <- gsub(" ", "", goflux_data_metadata_df$micro_habitat)
+# remove white space from light_dark
+goflux_data_metadata_df$light_dark     <- gsub(" ", "", goflux_data_metadata_df$light_dark)
+# change dak to dark 
+goflux_data_metadata_df$light_dark[which(goflux_data_metadata_df$light_dark=="dak")] <- "dark"
+# change palsa-moss to palsa_moss
+goflux_data_metadata_df$micro_habitat[which(goflux_data_metadata_df$micro_habitat=="palsa-moss")] <- "palsa_moss"
+# look at structure 
+str(goflux_data_metadata_df)
 
-# library(dplyr)
-# library(tidyverse)
-# measCampSpring <- goflux_data_metadata_df %>%
-#   filter(str_detect(datetime, "2023-05")) %>%
-#   mutate(measCamp="May")
-# 
-# measCampSummer <- goflux_data_metadata_df %>%
-#   filter(str_detect(datetime, "2023-07")) %>%
-#   mutate(measCamp="July")
-# 
-# measCampAutumn <- goflux_data_metadata_df %>%
-#   filter(str_detect(datetime, "2023-09")) %>%
-#   mutate(measCamp="September")
-# 
-# # rejoin
-# test <- measCampSpring
-# test <- rbind(test, measCampSummer, measCampAutumn)
+# STATS ---------------
+# standard correlation plots ----
+#install.packages("GGally")
+library(GGally)
+# create subset of df
+subset <- goflux_data_metadata_df %>%
+  select(datetime, plot_no, micro_habitat, 
+         n2o_nmolm2sec1, par, light_dark,
+         soil_moisture_12cm, soil_moisture_30cm,
+         soil_temperature_15cm_s1)
+# create correlation matrix
+plot <- subset %>%
+  ggpairs(mapping = aes(col = light_dark, alpha = 0.3),
+          lower = list(combo = wrap("facethist", bins = 20)))
+plot
+
+# create even smaller subset
+subsubset <- subset %>%
+  select(n2o_nmolm2sec1, light_dark, micro_habitat,
+         soil_moisture_12cm, soil_moisture_30cm, 
+         soil_temperature_15cm_s1)
+# create correlation matrix
+subplot <- subsubset %>%
+  ggpairs(mapping = aes(col = light_dark, alpha = 0.3),
+          lower = list(combo = wrap("facethist", bins = 20)))
+subplot
+
+# fancier correlation plots ----
+install.packages("corrplot")
+library(corrplot)
+# create subset of goflux_metadata_df with only numerical values
+num_values <- goflux_data_metadata_df %>%
+  select(plot_no, n2o_nmolm2sec1, par, soil_moisture_12cm, soil_moisture_30cm,
+         soil_temperature_15cm_s1)
+# calculate the correlation matrix and round it
+cor_matrix <- cor(num_values) 
+corrplot(cor_matrix, method="circle", type = "upper", cl.pos = "b", tl.pos = "d", tl.cex = 0.6)
+# nonsense for my data ? 
+
+# glimpse ----
+library(tidyr)
+# use gather() to gather columns into key-value pairs and then glimpse() at the resulting data
+# draw a bar plot of each variable
+gather(goflux_data_metadata_df) %>% ggplot(aes(value)) + facet_wrap("key", scales = "free") + geom_bar()
+# not super useful in my case
+
+# produce summary statistics by group
+summary_stats <- goflux_data_metadata_df %>% 
+  group_by(micro_habitat, light_dark) %>% 
+  summarise(count = n(), mean_grade = mean(n2o_nmolm2sec1))
+summary_stats
 
 
 # R base plots ---------------------------------------------------------------
@@ -127,9 +177,10 @@ symlog_trans <- function(base = 10, thr = 1, scale = 1){
 n2o_fluxes_seasons <- goflux_data_metadata_df %>%
   filter(plot_no != 8) %>%
   mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>%
-  ggplot(aes(x = datetime, y = n2o_nmolm2sec1, )) +
+  mutate(measCamp = factor(micro_habitat, levels = c("fen", "bog","palsa_moss","palsa_lichen"))) %>%
+  ggplot(aes(x = datetime, y = n2o_nmolm2sec1, colour = micro_habitat)) +
   geom_point(size = 2) +
-  scale_colour_manual(values = c("black", "orange")) +
+  scale_colour_manual(values = c("darkblue", "blue", "orange", "yellow")) +
   geom_smooth(method = "glm",  linewidth = 0.5, #formula = y ~ x,
               colour = "blue") +
   #ylim(-50,150) +
@@ -141,7 +192,7 @@ n2o_fluxes_seasons <- goflux_data_metadata_df %>%
   My_Theme
 n2o_fluxes_seasons
 
-# L + D -8 ####
+# LIGHT + DARK -8 ####
 n2o_fluxes_seasons2 <- goflux_data_metadata_df %>%
   filter(plot_no != 8) %>%
   mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>%
@@ -264,7 +315,7 @@ library(gridExtra)
 grid.arrange(n2o_season_fen, n2o_season_bog, n2o_season_palsamoss,  n2o_season_palsalichen)
 
 
-# BoxplogridExtra# Boxplots ####
+# boxplots ####
 # L + D ####
 dark_light_season <- goflux_data_metadata_df %>%
   mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>%
@@ -309,27 +360,32 @@ variability_transects
 # Spatial variability per measCamp####
 variability_measCamp <- goflux_data_metadata_df %>%
   filter(plot_no != 8) %>%
+  mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>%
+  mutate(micro_habitat = factor(micro_habitat, levels = c("palsa_lichen", "palsa_moss", "bog", "fen"))) %>%
   ggplot(aes(micro_habitat, n2o_nmolm2sec1)) + 
   geom_boxplot() +
   geom_jitter(aes(colour = micro_habitat)) +
-  scale_colour_manual(values = c("sienna", "sienna1", "skyblue", "slateblue1")) +
+  scale_colour_manual(values = c("yellow","orange","skyblue", "blue")) +
+  #scale_colour_manual(values = c("sienna", "sienna1", "skyblue", "slateblue1")) +
   labs(title = "Light and dark measurements without plot 8") +
   ylab(expression('N'['2']*'O' * ' flux ('* 'nmol' ~N[2]*O-N~ m^-2~s^-1*')')) +
   xlab("") +
-  facet_wrap(vars(measCamp)) +
-  My_Theme
+  facet_wrap(vars(measCamp)) 
+  #My_Theme
 # ylim(-0.001, 0.001) +
 # scale_y_continuous(trans ="symlog",limits=c(0,0.001)) 
 variability_measCamp
 
 # L per micro habitat ####
 light <- goflux_data_metadata_df %>%
+  mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>%
+  mutate(micro_habitat = factor(micro_habitat, levels = c("palsa_lichen", "palsa_moss", "bog", "fen"))) %>%
   filter(plot_no != 8) %>%
   filter(light_dark == "light") %>%
   ggplot(aes(micro_habitat, n2o_nmolm2sec1)) + 
   geom_boxplot() +
   geom_jitter(aes(colour = micro_habitat)) +
-  scale_colour_manual(values = c("sienna", "sienna1", "skyblue", "slateblue1")) +
+  scale_colour_manual(values = c("yellow","orange","skyblue", "blue")) +
   labs(title = "Light measurements without plot 8") +
   ylab(expression('N'['2']*'O' * ' flux ('* 'nmol' ~N[2]*O-N~ m^-2~s^-1*')')) +
   My_Theme
@@ -339,12 +395,14 @@ light
 
 # D per micro habitat ####
 dark <- goflux_data_metadata_df %>%
+  mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>%
+  mutate(micro_habitat = factor(micro_habitat, levels = c("palsa_lichen", "palsa_moss", "bog", "fen"))) %>%
   filter(plot_no != 8) %>%
   filter(light_dark == "dark") %>%
   ggplot(aes(micro_habitat, n2o_nmolm2sec1)) + 
   geom_boxplot() +
   geom_jitter(aes(colour = micro_habitat)) +
-  scale_colour_manual(values = c("sienna", "sienna1", "skyblue", "slateblue1")) +
+  scale_colour_manual(values = c("yellow","orange","skyblue", "blue")) +
   labs(title = "Dark measurements without plot 8") +
   ylab(expression('N'['2']*'O' * ' flux ('* 'nmol' ~N[2]*O-N~ m^-2~s^-1*')')) +
   My_Theme
@@ -352,35 +410,58 @@ dark <- goflux_data_metadata_df %>%
 # scale_y_continuous(trans ="symlog",limits=c(0,0.001)) 
 dark
 
+# Soil T + M ---------------------------------------------
+tests <- goflux_data_metadata_df %>%
+  filter(measCamp == "May") %>%
+  group_by(plot_no) %>%
+  summarise(mean_flux = mean(n2o_nmolm2sec1)) %>%
+  ggplot(aes(x = plot_no, y = mean_flux)) +
+  geom_point()
+tests  
+
+
+df %>%
+  group_by(age) %>%
+  summarise(mean_score = mean(score)) %>%
+  ggplot(aes(x = factor(age), y = mean_score)) +
+  geom_col() +
+  labs(x = "Age", y = "Mean score")
+
+  
+  
 # Soil temperature #### 
-# Soil T4 overall ####
+# Soil T4 overall per measCamp ####
 n2o_fluxes_soiltemp4 <- goflux_data_metadata_df %>%
+  mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>%
+  #mutate(measCamp = factor(micro_habitat, levels = c("fen", "bog","palsa_moss","palsa_lichen"))) %>%
   filter(plot_no != 8) %>%
-  ggplot( aes(x = soil_temperature_15cm_s4, y = n2o_nmolm2sec1, colour = micro_habitat)) +
+  ggplot( aes(x = soil_temperature_15cm_s4, y = n2o_nmolm2sec1)) +
   geom_point(size = 2) +
-  scale_colour_manual(values = c("sienna", "sienna1", "skyblue", "slateblue1")) +
   geom_smooth(method = "glm",  linewidth = 0.5, #formula = y ~ x,
               colour = "blue") +
   #ylim(-0.001, 0.001) +
   xlab("Soil temperature (°C) in 15 cm depth") +
   ylab(expression('N'['2']*'O' * ' flux ('* 'nmol' ~N[2]*O-N~ m^-2~s^-1*')')) +
   theme_bw() +
+  facet_wrap(vars(measCamp), scales = "free_x") +
   My_Theme
 n2o_fluxes_soiltemp4
 
 # Soil T4 per micro habitat####
 n2o_fluxes_soiltemp4 <- goflux_data_metadata_df %>%
+  mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>%
+  mutate(measCamp = factor(micro_habitat, levels = c("fen", "bog","palsa_moss","palsa_lichen"))) %>%
   filter(plot_no != 8) %>%
-  ggplot( aes(x = soil_temperature_15cm_s1, y = n2o_nmolm2sec1, colour = micro_habitat, shape = micro_habitat)) +
+  ggplot( aes(x = soil_temperature_15cm_s4, y = n2o_nmolm2sec1, colour = micro_habitat, shape = micro_habitat)) +
   geom_point(size = 2) +
-  scale_colour_manual(values = c("sienna", "sienna1", "skyblue", "slateblue1")) +
+  scale_colour_manual(values = c("blue", "darkblue", "yellow", "orange")) +
   geom_smooth(method = "glm",  linewidth = 0.5, #formula = y ~ x,
               colour = "blue") +
   #ylim(-0.001, 0.001) +
   xlab("Soil temperature (°C) in 15 cm depth") +
-  ylab(expression('N'['2']*'O' * ' flux ('* 'mg' ~N[2]*O-N~ m^-2~h^-1*')')) +
+  ylab(expression('N'['2']*'O' * ' flux ('* 'nmol' ~N[2]*O-N~ m^-2~s^-1*')')) +
   theme_bw() +
-  facet_wrap(vars(micro_habitat)) +
+  facet_wrap(vars(measCamp), scales = "free_x") +
   My_Theme
 n2o_fluxes_soiltemp4
 
@@ -388,10 +469,12 @@ n2o_fluxes_soiltemp4
 # Soil moisture ####
 # Soil M 12cm ####
 n2o_fluxes_soilmoist_12cm <- goflux_data_metadata_df %>%
+  mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>%
+  mutate(measCamp = factor(micro_habitat, levels = c("fen", "bog","palsa_moss","palsa_lichen"))) %>%
   filter(plot_no != 8) %>%
-  ggplot( aes(x = soil_moisture_12cm, y = n2o_nmolm2sec1, colour = micro_habitat)) +
+  ggplot( aes(x = soil_moisture_12cm, y = n2o_nmolm2sec1, colour = micro_habitat, shape = micro_habitat)) +
   geom_point(size = 2) +
-  scale_colour_manual(values = c("sienna", "sienna1", "skyblue", "slateblue1")) +
+  scale_colour_manual(values = c("blue", "darkblue", "yellow", "orange")) +
   geom_smooth(method = "glm",  linewidth = 0.5, #formula = y ~ x,
               colour = "blue") +
   #ylim(-0.001, 0.001) +
@@ -404,12 +487,14 @@ n2o_fluxes_soilmoist_12cm
 
 # Soil M 30 cm ####
 n2o_fluxes_soilmoist_30cm <- goflux_data_metadata_df %>%
+  mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>%
+  mutate(measCamp = factor(micro_habitat, levels = c("fen", "bog","palsa_moss","palsa_lichen"))) %>%
   filter(plot_no != 8) %>%
-  filter(soil_moisture_30cm < 2000) %>% # remove some weird error values 
-  filter(soil_moisture_30cm > 0) %>% # remove 0s
-  ggplot( aes(x = soil_moisture_30cm, y = n2o_nmolm2sec1, colour = micro_habitat)) +
+  #filter(soil_moisture_30cm < 2000) %>% # remove some weird error values 
+  #filter(soil_moisture_30cm > 0) %>% # remove 0s
+  ggplot( aes(x = soil_moisture_30cm, y = n2o_nmolm2sec1, colour = micro_habitat, shape=micro_habitat)) +
   geom_point(size = 2) +
-  scale_colour_manual(values = c("sienna", "sienna1", "skyblue", "slateblue1")) +
+  scale_colour_manual(values = c("blue", "darkblue", "yellow", "orange")) +
   geom_smooth(method = "glm",  linewidth = 0.5, #formula = y ~ x,
               colour = "blue") +
   #ylim(-0.001, 0.001) +
@@ -420,3 +505,23 @@ n2o_fluxes_soilmoist_30cm <- goflux_data_metadata_df %>%
   My_Theme
 n2o_fluxes_soilmoist_30cm
 
+# Spatial variability within micro habitat -------
+# Spatial variability per measCamp####
+variability_fen <- goflux_data_metadata_df %>%
+  filter(light_dark == "light") %>%
+  filter(micro_habitat == "fen") %>%
+  mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>%
+  ggplot(aes(x = plot_no, y = n2o_nmolm2sec1, group = plot_no)) + 
+  geom_boxplot() +
+  #geom_jitter(aes(colour = micro_habitat)) +
+  #scale_colour_manual(values = c("yellow","orange","skyblue", "blue")) +
+  #scale_colour_manual(values = c("sienna", "sienna1", "skyblue", "slateblue1")) +
+  labs(title = "Variability of light measurements within fen plots") +
+  ylab(expression('N'['2']*'O' * ' flux ('* 'nmol' ~N[2]*O-N~ m^-2~s^-1*')')) +
+  xlab("") +
+  facet_wrap(vars(measCamp)) +
+  My_Theme
+# ylim(-0.001, 0.001) +
+# scale_y_continuous(trans ="symlog",limits=c(0,0.001)) 
+variability_fen
+
-- 
GitLab


From e2cd1e521c0ecd736ff0ace53c6d1deff1896031 Mon Sep 17 00:00:00 2001
From: Nathalie Triches <ntriches@bgc-jena.mpg.de>
Date: Thu, 22 Feb 2024 16:58:14 +0200
Subject: [PATCH 18/24] test GCsimulation script: working!

* GC simulation script is working well :D
* next steps (as discussed on BBB): Jan will clean the combine_all_data_and_meta_data.R
script to make sure not too many mps are kicked out; take the last reading from the
soil T and soil M sensors as single average for these variables, and merge the branch
---
 .../combine_all_data_and_meta_data.R          | 58 +++++++++----------
 preprocessing/run_all_main.R                  |  2 +-
 2 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/preprocessing/functions/combine_all_data_and_meta_data.R b/preprocessing/functions/combine_all_data_and_meta_data.R
index ad06edf..d58ec60 100644
--- a/preprocessing/functions/combine_all_data_and_meta_data.R
+++ b/preprocessing/functions/combine_all_data_and_meta_data.R
@@ -116,35 +116,35 @@ goflux_data_metadata_df$light_dark[which(goflux_data_metadata_df$light_dark=="da
 # change palsa-moss to palsa_moss
 goflux_data_metadata_df$micro_habitat[which(goflux_data_metadata_df$micro_habitat=="palsa-moss")] <- "palsa_moss"
 
-# remove values below -1000
-goflux_data_metadata_df <- goflux_data_metadata_df %>%
-  filter(n2o_nmolm2sec1 > -500)
-# remove any line that contains NA
-rowContainsNoNA <- complete.cases(goflux_data_metadata_df)
-goflux_data_metadata_df <- goflux_data_metadata_df %>%
-  filter(rowContainsNoNA)
-
-
-# create new DF - one per measCampaign
-# needs 'stringr' library
-#   selection based on 'datetime' column containing "year-month"
-#   and replace 'measCamp' column values with a string of the name of the month
-
-measCampSpring <- goflux_data_metadata_df %>%
-  filter(str_detect(datetime, "2023-05")) %>%
-  mutate(measCamp="May")
-
-measCampSummer <- goflux_data_metadata_df %>%
-  filter(str_detect(datetime, "2023-07")) %>%
-  mutate(measCamp="July")
-
-measCampAutumn <- goflux_data_metadata_df %>%
-  filter(str_detect(datetime, "2023-09")) %>%
-  mutate(measCamp="September")
-
-# combine data frames from all measurement campaigns to one data set
-goflux_data_metadata_df <- measCampSpring
-goflux_data_metadata_df <- rbind(goflux_data_metadata_df, measCampSummer, measCampAutumn)
+# # remove values below -1000
+# goflux_data_metadata_df <- goflux_data_metadata_df %>%
+#   filter(n2o_nmolm2sec1 > -500)
+# # remove any line that contains NA
+# rowContainsNoNA <- complete.cases(goflux_data_metadata_df)
+# goflux_data_metadata_df <- goflux_data_metadata_df %>%
+#   filter(rowContainsNoNA)
+# 
+# 
+# # create new DF - one per measCampaign
+# # needs 'stringr' library
+# #   selection based on 'datetime' column containing "year-month"
+# #   and replace 'measCamp' column values with a string of the name of the month
+# 
+# measCampSpring <- goflux_data_metadata_df %>%
+#   filter(str_detect(datetime, "2023-05")) %>%
+#   mutate(measCamp="May")
+# 
+# measCampSummer <- goflux_data_metadata_df %>%
+#   filter(str_detect(datetime, "2023-07")) %>%
+#   mutate(measCamp="July")
+# 
+# measCampAutumn <- goflux_data_metadata_df %>%
+#   filter(str_detect(datetime, "2023-09")) %>%
+#   mutate(measCamp="September")
+# 
+# # combine data frames from all measurement campaigns to one data set
+# goflux_data_metadata_df <- measCampSpring
+# goflux_data_metadata_df <- rbind(goflux_data_metadata_df, measCampSummer, measCampAutumn)
 
 # write corrected DF goflux_data_metadata_df
 write.table(goflux_data_metadata_df,
diff --git a/preprocessing/run_all_main.R b/preprocessing/run_all_main.R
index ae47d21..05b5bb5 100644
--- a/preprocessing/run_all_main.R
+++ b/preprocessing/run_all_main.R
@@ -32,7 +32,7 @@ process_aeris_raw_data <- "F"
 create_plots_ghg_conc  <- "T"
 
 # use the PGA data to simulate GC data, and run only specific filter/correction functions
-simulateGCdata         <- "F"   # [F,T]
+simulateGCdata         <- "T"   # [F,T]
 
 # install / update goFlux package (to ensure using the most recent version) in 'main_flux_calculation.R'
 updateGoFluxPkg        <- "T"   # [F,T]
-- 
GitLab


From fb81a1ffaecccdb616d0294eee1dcab00a023859 Mon Sep 17 00:00:00 2001
From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de>
Date: Fri, 23 Feb 2024 19:54:51 +0100
Subject: [PATCH 19/24] minor cleanup, changes and prep. for upcomming changes

---
 .../calculate_fluxes_with_goflux_package.R    |  3 +-
 .../combine_all_data_and_meta_data.R          | 30 -------------------
 preprocessing/run_all_main.R                  |  5 ++--
 3 files changed, 4 insertions(+), 34 deletions(-)

diff --git a/preprocessing/functions/calculate_fluxes_with_goflux_package.R b/preprocessing/functions/calculate_fluxes_with_goflux_package.R
index 0d10072..bf6dcf3 100644
--- a/preprocessing/functions/calculate_fluxes_with_goflux_package.R
+++ b/preprocessing/functions/calculate_fluxes_with_goflux_package.R
@@ -23,8 +23,7 @@ goflux_best_n2o_flux_df   <- best.flux(flux.result = n2o_flux_result_df, g.limit
 # plot results of best fluxes
 plot_list <- flux.plot(flux.results = goflux_best_n2o_flux_df, dataframe = goflux_input_n2o_df, gastype = "N2Odry_ppb",
                        shoulder = 20,
-                       plot.legend = c("RMSE", "AICc", "SE", "r2", "g.factor"), # a maximum of 5 additional parameters can be displayed above the plot
-                       #plot.legend = c("MAE", "RMSE", "AICc", "SE", "r2", "g.factor"),
+                       plot.legend = c("MAE", "AICc", "SE", "r2", "g.factor"), # a maximum of 5 additional parameters can be displayed above the plot
                        best.model = TRUE)
 
 # save output file as pdf - figures (this function does save files to the 'present working dir' - hence, changing into 'outDir')
diff --git a/preprocessing/functions/combine_all_data_and_meta_data.R b/preprocessing/functions/combine_all_data_and_meta_data.R
index d58ec60..dd5acbb 100644
--- a/preprocessing/functions/combine_all_data_and_meta_data.R
+++ b/preprocessing/functions/combine_all_data_and_meta_data.R
@@ -116,36 +116,6 @@ goflux_data_metadata_df$light_dark[which(goflux_data_metadata_df$light_dark=="da
 # change palsa-moss to palsa_moss
 goflux_data_metadata_df$micro_habitat[which(goflux_data_metadata_df$micro_habitat=="palsa-moss")] <- "palsa_moss"
 
-# # remove values below -1000
-# goflux_data_metadata_df <- goflux_data_metadata_df %>%
-#   filter(n2o_nmolm2sec1 > -500)
-# # remove any line that contains NA
-# rowContainsNoNA <- complete.cases(goflux_data_metadata_df)
-# goflux_data_metadata_df <- goflux_data_metadata_df %>%
-#   filter(rowContainsNoNA)
-# 
-# 
-# # create new DF - one per measCampaign
-# # needs 'stringr' library
-# #   selection based on 'datetime' column containing "year-month"
-# #   and replace 'measCamp' column values with a string of the name of the month
-# 
-# measCampSpring <- goflux_data_metadata_df %>%
-#   filter(str_detect(datetime, "2023-05")) %>%
-#   mutate(measCamp="May")
-# 
-# measCampSummer <- goflux_data_metadata_df %>%
-#   filter(str_detect(datetime, "2023-07")) %>%
-#   mutate(measCamp="July")
-# 
-# measCampAutumn <- goflux_data_metadata_df %>%
-#   filter(str_detect(datetime, "2023-09")) %>%
-#   mutate(measCamp="September")
-# 
-# # combine data frames from all measurement campaigns to one data set
-# goflux_data_metadata_df <- measCampSpring
-# goflux_data_metadata_df <- rbind(goflux_data_metadata_df, measCampSummer, measCampAutumn)
-
 # write corrected DF goflux_data_metadata_df
 write.table(goflux_data_metadata_df,
             paste0(fluxResOutDir, "/", currentDateTime, "_", measCampListString, "goflux_n2o_data_metadata_corr.csv"),
diff --git a/preprocessing/run_all_main.R b/preprocessing/run_all_main.R
index 05b5bb5..606d951 100644
--- a/preprocessing/run_all_main.R
+++ b/preprocessing/run_all_main.R
@@ -13,7 +13,8 @@
 # 1. Script settings ----
 
 # list of measurement campaigns to run [YYYYMM]
-measCampList <- c("202305", "202307", "202309")
+measCampList      <- c("202305", "202307", "202309")
+measCampMonthList <- c("May",    "July",   "September")
 
 # gas type to process [T,F]
 process_co2 <- "F"
@@ -32,7 +33,7 @@ process_aeris_raw_data <- "F"
 create_plots_ghg_conc  <- "T"
 
 # use the PGA data to simulate GC data, and run only specific filter/correction functions
-simulateGCdata         <- "T"   # [F,T]
+simulateGCdata         <- "F"   # [F,T]
 
 # install / update goFlux package (to ensure using the most recent version) in 'main_flux_calculation.R'
 updateGoFluxPkg        <- "T"   # [F,T]
-- 
GitLab


From 5e0008189ec28ddece23e5767360b363ccfa62de Mon Sep 17 00:00:00 2001
From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de>
Date: Fri, 23 Feb 2024 20:57:34 +0100
Subject: [PATCH 20/24] modify soil moisture & temperature calculation for
 results DF

* tested: works
---
 .../combine_all_data_and_meta_data.R          | 39 ++++++++++---------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/preprocessing/functions/combine_all_data_and_meta_data.R b/preprocessing/functions/combine_all_data_and_meta_data.R
index dd5acbb..a07b031 100644
--- a/preprocessing/functions/combine_all_data_and_meta_data.R
+++ b/preprocessing/functions/combine_all_data_and_meta_data.R
@@ -7,10 +7,7 @@
 # output DF: goflux_data_metadata_df
 #
 #  colnames(goflux_data_metadata_df)
-#  [1] "datetime"                 "meas_ID"                  "plot_no"                  "micro_habitat"            "par"                      "light_dark"              
-#  [7] "soil_moisture_12cm"       "soil_moisture_30cm"       "soil_temperature_15cm_s1" "soil_temperature_15cm_s2" "soil_temperature_15cm_s3" "soil_temperature_15cm_s4"
-# [13] "ch4_µmolm2sec1"           "co2_µmolm2sec1"           "n2o_nmolm2sec1"           "n2o_LMFlux"               "n2o_HMFlux"               "n2o_bestFlux"            
-# [19] "n2o_bestModel"            "measCamp"                
+             
 
 
 # number of measurement periods (mp)
@@ -41,10 +38,7 @@ goflux_data_metadata_df <- data.frame(
   par                      = rep(NA, goflux_total_mp),
   soil_moisture_12cm       = rep(NA, goflux_total_mp),
   soil_moisture_30cm       = rep(NA, goflux_total_mp),
-  soil_temperature_15cm_s1 = rep(NA, goflux_total_mp),
-  soil_temperature_15cm_s2 = rep(NA, goflux_total_mp),
-  soil_temperature_15cm_s3 = rep(NA, goflux_total_mp),
-  soil_temperature_15cm_s4 = rep(NA, goflux_total_mp)
+  soil_temperature_15cm_sm = rep(NA, goflux_total_mp)     # median of values from four sensors
 )
 
 # # remove invalid mp from metDataDF
@@ -64,12 +58,16 @@ for (goflux_row in 1:goflux_total_mp) {
 
   # get the rows of goFlux output and metDataDF corresponding with the gf_uID
   rowNum_goFluxDF    <- goflux_row
-  rowNum_metaDataDF  <- which(measPeriodMetaDataDF$meas_ID     == gf_uID)
-  rowNum_ppN2OdataDF <- which(ppN2OdataDF$meas_ID              == gf_uID)
+  rowNum_metaDataDF  <- which(measPeriodMetaDataDF$meas_ID == gf_uID)
+
+  # get the last x rows of the pre-processed DF that contains environmental data
+  allRows_ppN2OdataDF       <- which(ppN2OdataDF$meas_ID == gf_uID)
+  numberOfLastRowsToInclude <- 10
+  selRows_ppN2OdataDF       <- allRows_ppN2OdataDF[(length(allRows_ppN2OdataDF) - numberOfLastRowsToInclude):length(allRows_ppN2OdataDF)]
 
   # test if meas_ID does exist (only once) in metaDataDF & ppN2OdataDF
   if (! length(rowNum_metaDataDF) == 1)  stop(paste0("ERROR! no match of measID in metaDataDF and UniqueID of goFlux, UniqueID: ", gf_uID))
-  if (length(rowNum_ppN2OdataDF) < 1)    stop(paste0("ERROR! no match of measID in ppN2OdataDF and UniqueID of goFlux, UniqueID: ", gf_uID))
+  if (length(allRows_ppN2OdataDF) < 1)   stop(paste0("ERROR! no match of measID in ppN2OdataDF and UniqueID of goFlux, UniqueID: ", gf_uID))
 
   # measPeriodMetaDataDF << metaData
   goflux_data_metadata_df$datetime[goflux_row]         <- measPeriodMetaDataDF$datetime_mp_first[rowNum_metaDataDF] # datetime from measurement start 
@@ -86,14 +84,17 @@ for (goflux_row in 1:goflux_total_mp) {
   goflux_data_metadata_df$n2o_bestModel[goflux_row]    <- goflux_best_n2o_flux_df$model[rowNum_goFluxDF]
 
   # ppN2OdataDF << environmental data
-  goflux_data_metadata_df$par[goflux_row]                       <- median(ppN2OdataDF$par[rowNum_ppN2OdataDF], na.rm = FALSE)
-  goflux_data_metadata_df$soil_moisture_12cm[goflux_row]        <- median(ppN2OdataDF$vwc12[rowNum_ppN2OdataDF], na.rm = FALSE)
-  goflux_data_metadata_df$soil_moisture_30cm[goflux_row]        <- median(ppN2OdataDF$vwc30[rowNum_ppN2OdataDF], na.rm = FALSE)
-  goflux_data_metadata_df$soil_temperature_15cm_s1[goflux_row]  <- median(ppN2OdataDF$soilT1degC[rowNum_ppN2OdataDF], na.rm = FALSE)
-  goflux_data_metadata_df$soil_temperature_15cm_s2[goflux_row]  <- median(ppN2OdataDF$soilT2degC[rowNum_ppN2OdataDF], na.rm = FALSE)
-  goflux_data_metadata_df$soil_temperature_15cm_s3[goflux_row]  <- median(ppN2OdataDF$soilT3degC[rowNum_ppN2OdataDF], na.rm = FALSE)
-  goflux_data_metadata_df$soil_temperature_15cm_s4[goflux_row]  <- median(ppN2OdataDF$soilT4degC[rowNum_ppN2OdataDF], na.rm = FALSE)    
-
+  #   use all PAR values
+  goflux_data_metadata_df$par[goflux_row]                       <- median(ppN2OdataDF$par[allRows_ppN2OdataDF], na.rm = TRUE)
+  #   use the last x values of the mp
+  goflux_data_metadata_df$soil_moisture_12cm[goflux_row]        <- median(ppN2OdataDF$vwc12[selRows_ppN2OdataDF], na.rm = TRUE)
+  goflux_data_metadata_df$soil_moisture_30cm[goflux_row]        <- median(ppN2OdataDF$vwc30[selRows_ppN2OdataDF], na.rm = TRUE)
+  #   use the last x values of the mp and average across sensors
+  goflux_data_metadata_df$soil_temperature_15cm_sm[goflux_row]  <- median(c(ppN2OdataDF$soilT1degC[selRows_ppN2OdataDF],
+                                                                            ppN2OdataDF$soilT2degC[selRows_ppN2OdataDF],
+                                                                            ppN2OdataDF$soilT3degC[selRows_ppN2OdataDF],
+                                                                            ppN2OdataDF$soilT4degC[selRows_ppN2OdataDF]),
+                                                                          na.rm = TRUE)
 }
 
 # write DF goflux_data_metadata_df
-- 
GitLab


From b73783191b73a2048469a502c117b8c39f9fe10e Mon Sep 17 00:00:00 2001
From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de>
Date: Fri, 23 Feb 2024 21:04:02 +0100
Subject: [PATCH 21/24] rename four flux output file names for consistency and
 improved readability

---
 .../functions/calculate_fluxes_with_goflux_package.R          | 4 ++--
 preprocessing/functions/combine_all_data_and_meta_data.R      | 4 ++--
 preprocessing/run_all_main.R                                  | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/preprocessing/functions/calculate_fluxes_with_goflux_package.R b/preprocessing/functions/calculate_fluxes_with_goflux_package.R
index bf6dcf3..c30c091 100644
--- a/preprocessing/functions/calculate_fluxes_with_goflux_package.R
+++ b/preprocessing/functions/calculate_fluxes_with_goflux_package.R
@@ -33,8 +33,8 @@ setwd(mainDir)
 
 # save output file from flux calculations
 write.table(n2o_flux_result_df,
-            paste0(fluxResOutDir, "/measCamp_", measCampListString, "goflux_goflux_n2o_output.csv"),
+            paste0(fluxResOutDir, "/", currentDateTime, "goflux_goflux_n2o_output__mc", measCampListString, ".csv"),
             row.names = FALSE, quote = FALSE, sep = ";", col.names = TRUE, na = "NA")
 write.table(goflux_best_n2o_flux_df,
-            paste0(fluxResOutDir, "/measCamp_", measCampListString, "goflux_bestflux_n2o_output.csv"),
+            paste0(fluxResOutDir, "/", currentDateTime, "goflux_bestflux_n2o_output__mc", measCampListString, ".csv"),
             row.names = FALSE, quote = FALSE, sep = ";", col.names = TRUE, na = "NA")
diff --git a/preprocessing/functions/combine_all_data_and_meta_data.R b/preprocessing/functions/combine_all_data_and_meta_data.R
index a07b031..29e11e7 100644
--- a/preprocessing/functions/combine_all_data_and_meta_data.R
+++ b/preprocessing/functions/combine_all_data_and_meta_data.R
@@ -99,7 +99,7 @@ for (goflux_row in 1:goflux_total_mp) {
 
 # write DF goflux_data_metadata_df
 write.table(goflux_data_metadata_df,
-            paste0(fluxResOutDir, "/", currentDateTime, "_", measCampListString, "goflux_n2o_data_metadata.csv"),
+            paste0(fluxResOutDir, "/", currentDateTime, "_goflux_n2o_data_metadata__mc", measCampListString, ".csv"),
             row.names = FALSE,
             quote = FALSE,
             sep = ",")
@@ -119,7 +119,7 @@ goflux_data_metadata_df$micro_habitat[which(goflux_data_metadata_df$micro_habita
 
 # write corrected DF goflux_data_metadata_df
 write.table(goflux_data_metadata_df,
-            paste0(fluxResOutDir, "/", currentDateTime, "_", measCampListString, "goflux_n2o_data_metadata_corr.csv"),
+            paste0(fluxResOutDir, "/", currentDateTime, "_goflux_n2o_data_metadata_corrected__mc", measCampListString, ".csv"),
             row.names = FALSE,
             quote = FALSE,
             sep = ",")
diff --git a/preprocessing/run_all_main.R b/preprocessing/run_all_main.R
index 606d951..6b1fe80 100644
--- a/preprocessing/run_all_main.R
+++ b/preprocessing/run_all_main.R
@@ -48,7 +48,7 @@ updateGoFluxPkg        <- "T"   # [F,T]
 # create string with all measCamp
 measCampListString <- character()
 for (mc in measCampList){
-  measCampListString <- paste0(measCampListString, mc, "_")
+  measCampListString <- paste0(measCampListString, "_", mc)
 }
 
 # set dirs
-- 
GitLab


From 4db43cb7d65773f4c27cf1df551c7a6f667dc1f3 Mon Sep 17 00:00:00 2001
From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de>
Date: Fri, 23 Feb 2024 21:12:21 +0100
Subject: [PATCH 22/24] minor changes in output file names

---
 .../functions/calculate_fluxes_with_goflux_package.R          | 4 ++--
 preprocessing/functions/combine_all_data_and_meta_data.R      | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/preprocessing/functions/calculate_fluxes_with_goflux_package.R b/preprocessing/functions/calculate_fluxes_with_goflux_package.R
index c30c091..a5c76df 100644
--- a/preprocessing/functions/calculate_fluxes_with_goflux_package.R
+++ b/preprocessing/functions/calculate_fluxes_with_goflux_package.R
@@ -33,8 +33,8 @@ setwd(mainDir)
 
 # save output file from flux calculations
 write.table(n2o_flux_result_df,
-            paste0(fluxResOutDir, "/", currentDateTime, "goflux_goflux_n2o_output__mc", measCampListString, ".csv"),
+            paste0(fluxResOutDir, "/", currentDateTime, "__goflux_goflux_n2o_output__mc", measCampListString, ".csv"),
             row.names = FALSE, quote = FALSE, sep = ";", col.names = TRUE, na = "NA")
 write.table(goflux_best_n2o_flux_df,
-            paste0(fluxResOutDir, "/", currentDateTime, "goflux_bestflux_n2o_output__mc", measCampListString, ".csv"),
+            paste0(fluxResOutDir, "/", currentDateTime, "__goflux_bestflux_n2o_output__mc", measCampListString, ".csv"),
             row.names = FALSE, quote = FALSE, sep = ";", col.names = TRUE, na = "NA")
diff --git a/preprocessing/functions/combine_all_data_and_meta_data.R b/preprocessing/functions/combine_all_data_and_meta_data.R
index 29e11e7..4ef6b64 100644
--- a/preprocessing/functions/combine_all_data_and_meta_data.R
+++ b/preprocessing/functions/combine_all_data_and_meta_data.R
@@ -99,7 +99,7 @@ for (goflux_row in 1:goflux_total_mp) {
 
 # write DF goflux_data_metadata_df
 write.table(goflux_data_metadata_df,
-            paste0(fluxResOutDir, "/", currentDateTime, "_goflux_n2o_data_metadata__mc", measCampListString, ".csv"),
+            paste0(fluxResOutDir, "/", currentDateTime, "__goflux_n2o_data_metadata__mc", measCampListString, ".csv"),
             row.names = FALSE,
             quote = FALSE,
             sep = ",")
@@ -119,7 +119,7 @@ goflux_data_metadata_df$micro_habitat[which(goflux_data_metadata_df$micro_habita
 
 # write corrected DF goflux_data_metadata_df
 write.table(goflux_data_metadata_df,
-            paste0(fluxResOutDir, "/", currentDateTime, "_goflux_n2o_data_metadata_corrected__mc", measCampListString, ".csv"),
+            paste0(fluxResOutDir, "/", currentDateTime, "__goflux_n2o_data_metadata_corrected__mc", measCampListString, ".csv"),
             row.names = FALSE,
             quote = FALSE,
             sep = ",")
-- 
GitLab


From 82f9a2acacae71aa8517045bfa284b7220abff85 Mon Sep 17 00:00:00 2001
From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de>
Date: Fri, 23 Feb 2024 21:34:51 +0100
Subject: [PATCH 23/24] add measCamp and the name of the month of the measCamp
 to metaDataDF and flux output DF

* not yet tested
---
 .../combine_all_data_and_meta_data.R          |  4 ++++
 preprocessing/main_data_preprocessing.R       | 19 +++++++++++++------
 preprocessing/run_all_main.R                  |  7 ++++++-
 3 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/preprocessing/functions/combine_all_data_and_meta_data.R b/preprocessing/functions/combine_all_data_and_meta_data.R
index 4ef6b64..2162e48 100644
--- a/preprocessing/functions/combine_all_data_and_meta_data.R
+++ b/preprocessing/functions/combine_all_data_and_meta_data.R
@@ -23,6 +23,8 @@ goflux_data_metadata_df <- data.frame(
   # metaData
   datetime                 = as.POSIXct(rep("1000-01-01 00:00:00", goflux_total_mp), format = "%Y-%m-%d %H:%M:%S", tz="UTC" ),
   meas_ID                  = rep(NA, goflux_total_mp),
+  meas_camp                = rep(NA, goflux_total_mp),
+  meas_camp_month          = rep(NA, goflux_total_mp),
   plot_no                  = rep(NA, goflux_total_mp),
   micro_habitat            = rep(NA, goflux_total_mp),
   light_dark               = rep(NA, goflux_total_mp),
@@ -72,6 +74,8 @@ for (goflux_row in 1:goflux_total_mp) {
   # measPeriodMetaDataDF << metaData
   goflux_data_metadata_df$datetime[goflux_row]         <- measPeriodMetaDataDF$datetime_mp_first[rowNum_metaDataDF] # datetime from measurement start 
   goflux_data_metadata_df$meas_ID[goflux_row]          <- measPeriodMetaDataDF$meas_ID[rowNum_metaDataDF]
+  goflux_data_metadata_df$meas_camp[goflux_row]        <- measPeriodMetaDataDF$meas_camp[rowNum_metaDataDF]
+  goflux_data_metadata_df$meas_camp_month[goflux_row]  <- measPeriodMetaDataDF$meas_camp_month[rowNum_metaDataDF]
   goflux_data_metadata_df$plot_no[goflux_row]          <- measPeriodMetaDataDF$plot_no[rowNum_metaDataDF]
   goflux_data_metadata_df$micro_habitat[goflux_row]    <- measPeriodMetaDataDF$micro_habitat[rowNum_metaDataDF]
   goflux_data_metadata_df$light_dark[goflux_row]       <- measPeriodMetaDataDF$light_dark[rowNum_metaDataDF]
diff --git a/preprocessing/main_data_preprocessing.R b/preprocessing/main_data_preprocessing.R
index 1142a75..2e3ef8d 100644
--- a/preprocessing/main_data_preprocessing.R
+++ b/preprocessing/main_data_preprocessing.R
@@ -25,6 +25,7 @@ f_main_data_preprocessing <- function(username,
                                       outDir,
                                       currentDateTime,
                                       measCamp,
+                                      measCampMonth,
                                       expYear){
 
 # print info
@@ -120,7 +121,13 @@ if (expYear == "2022"){
 }
 
 
-# 4 Plot GHG concentrations ----
+# 4 Add two new columns to metaDataDF with the measCamp and the month as name ----
+  # e.g., "202305" and "May"
+  measPeriodMetaDataDF$meas_camp       <- c(rep(measCamp,      nrow(measPeriodMetaDataDF)))
+  measPeriodMetaDataDF$meas_camp_month <- c(rep(measCampMonth, nrow(measPeriodMetaDataDF)))
+
+
+# 5 Plot GHG concentrations ----
 # needed for data quality control during field campaign
 # output: concentration / time during chamber closure (measurement period)
 
@@ -139,7 +146,7 @@ if (create_plots_ghg_conc == "T"){
 }
 
 
-# 5. Correct measurement data per measurement period and gas ----
+# 6. Correct measurement data per measurement period and gas ----
 
 # info
 print("start correcting and filter measurement periods of N2O and associated environmental data")
@@ -177,7 +184,7 @@ for (row in 1:nrow(measPeriodMetaDataDF)){
 rownames(measPeriodN2oCorr) <- seq(1:nrow(measPeriodN2oCorr))
 
 
-# 5.1 Plot figures comparing data pre and post data-correction ----
+# 6.1 Plot figures comparing data pre and post data-correction ----
 
 source("functions/plot_ghg_conc_pre_post_mp_correction.R")
 if (process_n2o == "T"){
@@ -189,7 +196,7 @@ if (process_n2o == "T"){
 }
 
 
-# 5.2 Add new column "time_since_mp_start" to DF ----
+# 6.2 Add new column "time_since_mp_start" to DF ----
 # needed for flux calculation with goFlux package 
 
 # run this only after the measPeriod data was modified / corrected !
@@ -208,7 +215,7 @@ if (process_n2o == "T"){
                                                           gas_ID$n2o_aeris)
 }
 
-# 6. Sample gas concentration data for GC simulation ----
+# 7. Sample gas concentration data for GC simulation ----
 if (simulateGCdata == "T"){
   print("")
   print(">> start sampling data for GC simulation")
@@ -290,7 +297,7 @@ if (simulateGCdata == "T"){
   rownames(simGCsamplesDF) <- seq(1:nrow(simGCsamplesDF))
 }
 
-# 7. Create DF with only valid measurement periods ----
+# 8. Create DF with only valid measurement periods ----
 print("")
 print(">> create DF with only valid measurement periods")
 
diff --git a/preprocessing/run_all_main.R b/preprocessing/run_all_main.R
index 6b1fe80..a5cf4ed 100644
--- a/preprocessing/run_all_main.R
+++ b/preprocessing/run_all_main.R
@@ -132,7 +132,11 @@ ppN2OdataDF           <- data.frame()
 measPeriodMetaDataDF  <- data.frame()
 
 # loop over measCampList
-for (measCamp in measCampList){
+for (mc in 1:length(measCampList)){
+
+  # get measCamp YYYYMM and name of the month
+  measCamp      <- measCampList[mc]
+  measCampMonth <- measCampMonthList[mc]
 
   # read raw data from Aeris output file
   if (measCamp == "202307"){
@@ -159,6 +163,7 @@ for (measCamp in measCampList){
                                         outDir,
                                         currentDateTime,
                                         measCamp,
+                                        measCampMonth,
                                         expYear)
 
   # extract the two DF from list
-- 
GitLab


From e9e9e9f0e85d19b6f95e45702fd88d15d059933c Mon Sep 17 00:00:00 2001
From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de>
Date: Fri, 23 Feb 2024 22:07:12 +0100
Subject: [PATCH 24/24] minor improvement of docu

---
 preprocessing/run_all_main.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/preprocessing/run_all_main.R b/preprocessing/run_all_main.R
index a5cf4ed..1ad420f 100644
--- a/preprocessing/run_all_main.R
+++ b/preprocessing/run_all_main.R
@@ -12,9 +12,9 @@
 
 # 1. Script settings ----
 
-# list of measurement campaigns to run [YYYYMM]
+# list of measurement campaigns to run [YYYYMM] and [month]
 measCampList      <- c("202305", "202307", "202309")
-measCampMonthList <- c("May",    "July",   "September")
+measCampMonthList <- c("May",    "July",   "September")    # must be consistent with 'measCampList'
 
 # gas type to process [T,F]
 process_co2 <- "F"
-- 
GitLab