From d8ebb98ae0b94397f971f68753340aa131afc7a1 Mon Sep 17 00:00:00 2001 From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de> Date: Mon, 19 Feb 2024 15:30:36 +0100 Subject: [PATCH 01/24] multiple changes in code, new files, splitting the main script, new run_all_main.R * the pre-processing code needed some re-work, as it is now called in a function and the "global environment" does not work anymore * testing: * data preprocessing runs for 202307 * with the two modes: normal & simGC * next step: * modify measPeriod ID * run pre-processing for all 2023 measCamp --- preprocessing/functions/plot_ghg_conc_2023.R | 42 +-- preprocessing/functions/read_raw_data_2023.R | 10 +- .../functions/sort_n2o_conc_by_meas_period.R | 9 +- .../sort_n2o_conc_by_meas_period_2023.R | 26 +- ...ain_script.R => main_data_preprocessing.R} | 263 +++++------------- preprocessing/main_flux_analysis.R | 27 ++ preprocessing/main_flux_calculation.R | 32 +++ preprocessing/run_all_main.R | 148 ++++++++++ preprocessing/run_main_script.sh | 27 -- 9 files changed, 325 insertions(+), 259 deletions(-) rename preprocessing/{main_script.R => main_data_preprocessing.R} (62%) create mode 100644 preprocessing/main_flux_analysis.R create mode 100644 preprocessing/main_flux_calculation.R create mode 100644 preprocessing/run_all_main.R delete mode 100755 preprocessing/run_main_script.sh diff --git a/preprocessing/functions/plot_ghg_conc_2023.R b/preprocessing/functions/plot_ghg_conc_2023.R index 12fa6e0..73d764c 100644 --- a/preprocessing/functions/plot_ghg_conc_2023.R +++ b/preprocessing/functions/plot_ghg_conc_2023.R @@ -112,7 +112,6 @@ for (i in unique(meas_ID)){ ################################################################################ ################# Plots for N2O ################################################ ################################################################################ -if (process_n2o == "T"){ # # enable plot comparison (copy-paste from 20230414_simple_habitat_comparison) # # code needs to be cleaned! @@ -234,22 +233,32 @@ if (process_n2o == "T"){ -# non-modified measurement periods (5 min) -meas_ID <- measPeriodN2o$meas_ID +# plot non-modified measurement periods (5 min) +f_plot_ghg_conc_2023_n2o_non_mod_mp <- function(process_n2o, + figsGasConcDir, + expYear, + measPeriodN2o){ -for (i in unique(meas_ID)){ - fileID <- i - d <- subset(measPeriodN2o, meas_ID == i) - png(filename=paste0(figsGasConcDir, "/", expYear, "/n2o_mp_", fileID, ".png"), - height=4, width=8, pointsize=8, bg="white", units="in", res=200) - par(mfrow = c(1,2)) - plot(d$datetime_datalogger, d$n2oppm_aeris) - title(main = i) - # hist(d$n2oppm_aeris, breaks = 15) - boxplot(d$n2oppm_aeris) - #plot(d$airTdegC) - par(mfrow = c(1,1)) - dev.off() + print(">> plot GHG concentrations N2O") + + if (process_n2o == "T"){ + meas_ID <- measPeriodN2o$meas_ID + + for (i in unique(meas_ID)){ + fileID <- i + d <- subset(measPeriodN2o, meas_ID == i) + png(filename=paste0(figsGasConcDir, "/", expYear, "/n2o_mp_", fileID, ".png"), + height=4, width=8, pointsize=8, bg="white", units="in", res=200) + par(mfrow = c(1,2)) + plot(d$datetime_datalogger, d$n2oppm_aeris) + title(main = i) + # hist(d$n2oppm_aeris, breaks = 15) + boxplot(d$n2oppm_aeris) + #plot(d$airTdegC) + par(mfrow = c(1,1)) + dev.off() + } + } } # # modified measurement periods: -20 seconds in the start, -10 seconds in the end @@ -271,4 +280,3 @@ for (i in unique(meas_ID)){ # dev.off() # } -} # if (process_n2o == "T") diff --git a/preprocessing/functions/read_raw_data_2023.R b/preprocessing/functions/read_raw_data_2023.R index 9417001..0ce45f1 100644 --- a/preprocessing/functions/read_raw_data_2023.R +++ b/preprocessing/functions/read_raw_data_2023.R @@ -46,7 +46,7 @@ get_datalogger_raw_data <- function(mainDir, dataDir, measCamp){ ) setwd(mainDir) - print("end get_datalogger_raw_data") + print(" end get_datalogger_raw_data") return(datalogger_data_polished) } else { @@ -69,7 +69,7 @@ get_datalogger_raw_data <- function(mainDir, dataDir, measCamp){ ) setwd(mainDir) - print("end get_datalogger_raw_data") + print(" end get_datalogger_raw_data") return(datalogger_data_polished) } } @@ -139,7 +139,7 @@ get_aeris_raw_data <- function(mainDir, dataDir, measCamp){ # "%m/%d/%Y %H:%M:%OS %Z", tz="UTC") setwd(mainDir) - print("end get_aeris_raw_data") + print(" end get_aeris_raw_data") return(aeris_data_polished) } @@ -193,7 +193,7 @@ get_field_mp_meta_data_DF <- function(mainDir, dataDir, measCamp){ select(c("datetime_start","datetime_end"), everything()) setwd(mainDir) - print("end get_field_mp_meta_data_DF") + print(" end get_field_mp_meta_data_DF") return(field_mp_meta_data_DF_polished) } else { @@ -238,7 +238,7 @@ get_field_mp_meta_data_DF <- function(mainDir, dataDir, measCamp){ select(c("datetime_start","datetime_end"), everything()) setwd(mainDir) - print("end get_field_mp_meta_data_DF") + print(" end get_field_mp_meta_data_DF") return(field_mp_meta_data_DF_polished) } diff --git a/preprocessing/functions/sort_n2o_conc_by_meas_period.R b/preprocessing/functions/sort_n2o_conc_by_meas_period.R index ac1aa1a..f7373c2 100644 --- a/preprocessing/functions/sort_n2o_conc_by_meas_period.R +++ b/preprocessing/functions/sort_n2o_conc_by_meas_period.R @@ -7,10 +7,13 @@ # one chamber closure time is called one measurement period (short: mp) -create_n2o_data_sorted_by_measurements <- function(mainDir, dataDir, expYear){ +create_n2o_data_sorted_by_measurements <- function(mainDir, dataDir, expYear, + measPeriodMetaDataDF, + ch4_co2_n2o_conc_st_sm_par_data, + gasID){ print("start create_n2o_conc_sorted_by_measurements") - + # create empty (new) data frame for collection of all measurement periods allMeasPeriodDF <- data.frame() @@ -85,7 +88,7 @@ create_n2o_data_sorted_by_measurements <- function(mainDir, dataDir, expYear){ # create vector with same amount of rows as measured gas concentrations # and all rows containing the mp ID & gas ID for the measurement period vec_meas_ID <- rep(measPeriodMetaDataDF$meas_ID[i_mp], nrow(meas_period_DF)) - vec_gas_ID <- rep(gas_ID$n2o_aeris, nrow(meas_period_DF)) + vec_gas_ID <- rep(gasID, nrow(meas_period_DF)) # add new columns to meas_period_DF meas_period_DF["meas_ID"] <- vec_meas_ID meas_period_DF["gas_ID"] <- vec_gas_ID diff --git a/preprocessing/functions/sort_n2o_conc_by_meas_period_2023.R b/preprocessing/functions/sort_n2o_conc_by_meas_period_2023.R index 72b5ac8..9a26e85 100644 --- a/preprocessing/functions/sort_n2o_conc_by_meas_period_2023.R +++ b/preprocessing/functions/sort_n2o_conc_by_meas_period_2023.R @@ -7,10 +7,13 @@ # one chamber closure time is called one measurement period (short: mp) -create_n2o_conc_sorted_by_measurements <- function(mainDir, dataDir, expYear){ - - print("start create_n2o_conc_sorted_by_mp") +f_create_n2o_conc_sorted_by_measurements <- function(field_mp_meta_data_DF, + measPeriodMetaDataDF, + ch4_co2_n2o_conc_st_sm_par_data, + gasID){ + print("start f_create_n2o_conc_sorted_by_measurements") + # create empty (new) data frame for collection of all measurement periods allMeasPeriodDF <- data.frame() @@ -94,20 +97,19 @@ create_n2o_conc_sorted_by_measurements <- function(mainDir, dataDir, expYear){ # get the ch4 concentrations of the particular measurement period # = start at datetime_start, end at datetime_end + all values in between meas_period_DF <- fluxMetaLicor[first_row:last_row, ] - + # add two new columns to meas_period_DF: ID of measurement period and ID of gas # create vector with same amount of rows as measured gas concentrations # and all rows containing the mp ID & gas ID for the measurement period vec_meas_ID <- rep(measPeriodMetaDataDF$meas_ID[i_mp], nrow(meas_period_DF)) - vec_gas_ID <- rep(gas_ID$is_n2o_aeris, nrow(meas_period_DF)) + vec_gas_ID <- rep(gasID, nrow(meas_period_DF)) # add new columns to meas_period_DF meas_period_DF["meas_ID"] <- vec_meas_ID meas_period_DF["gas_ID"] <- vec_gas_ID - + # set gas_ID in measPeriodMetaDataDF - measPeriodMetaDataDF$is_n2o_aeris[i_mp] <<- 1 - # (1 == TRUE, no gas_ID); "<<-" means: write to global object, e.g., DF, within function - + measPeriodMetaDataDF$is_n2o_aeris[i_mp] <- 1 + # screen output print(paste0("mp meas_ID & gas_ID: ", vec_meas_ID[1], " ", vec_gas_ID[1], " | median of N2O concentration: ", median(meas_period_DF$n2oppm_aeris))) @@ -117,8 +119,10 @@ create_n2o_conc_sorted_by_measurements <- function(mainDir, dataDir, expYear){ allMeasPeriodDF <- rbind(allMeasPeriodDF, meas_period_DF) } - print("end create_n2o_conc_sorted_by_mp") - return(allMeasPeriodDF) + print(" end create_n2o_conc_sorted_by_mp") + + # return metaData and data DF + return(list(measPeriodMetaDataDF, allMeasPeriodDF)) } diff --git a/preprocessing/main_script.R b/preprocessing/main_data_preprocessing.R similarity index 62% rename from preprocessing/main_script.R rename to preprocessing/main_data_preprocessing.R index 7785002..da54b80 100644 --- a/preprocessing/main_script.R +++ b/preprocessing/main_data_preprocessing.R @@ -1,106 +1,37 @@ -# Main script for processing data -# Use: This script is the main script used to automatically process the data -# gathered in the field with a data logger and portable gas analysers (GA's) +# +# Main script for pre-processing data +# Use: This script is the main script used to automatically pre-process the data +# gathered in the field with a data logger and portable gas analysers (GA's) +# # 2022: the used GA's are the LICOR Li-7810 for CH2/CH4/H2O concentrations and the AERIS MIRA Ultra N2O/CO2/H2O for N2O conc +# 2023: +# 2024: -# Aim: Automatic processing + avoid manual (pre)processing of data in multiple spreadsheets # =============================================================================================== # -# script settings ---- +# ------------------------------- Data pre-processing ------------------------------------------- # =============================================================================================== # -# experiment yaer [YYYY] and month [MM] -# get arguments from 'preprocessing/run_main_script.sh' -args <- commandArgs(TRUE) -if (length(args) > 0){ - # if arguments are provided (by running './run_main_script.sh') - expYear <-args[1] - expMonth <-args[2] -} else { - # if no arguments are provided (just running this main_script.R) - expYear <- "2023" - expMonth <- "07" -} -# measurement campaign [YYYYMM] -measCamp <- paste0(expYear, expMonth) -# gas type to process [T,F] -process_co2 <- "F" -process_ch4 <- "F" -process_n2o <- "T" -# how to calculate gas fluxes -calc_fluxes_with_gasfluxes_package <- "F" -calc_fluxes_with_goflux_package <- "T" - -# process aeris raw data (depending on measurement campaign) [T,F] -# FALSE by default -process_aeris_raw_data <- "F" -# set to TRUE for individual measurement campaigns -if (measCamp == "202307") process_aeris_raw_data <- "T" ## read raw data from Aeris output file - -# plot figures [T,F] -create_plots_ghg_conc <- "T" - -# use the PGA data to simulate GC data, and run only specific filter/correction functions -simulateGCdata <- "F" # [F,T] - - -# =============================================================================================== # -# init & get main variables & settings, set directories, change into main dir, load R libraries ---- -# =============================================================================================== # - -# set dirs -# main working directory (version control with gitlab) -# data dir (files with field data) -# differentiate between users -username <- Sys.getenv("USER") -if (username == "ntriches"){ - # Nathalie - mainDir <- "/home/ntriches/git_repo/data-analysis/preprocessing" - dataDir <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/data" - figsGasConcDir <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/figures_processing" - outDir <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output" -} else if (username == "jengel"){ - # Jan - mainDir <- "/home/jengel/repo_git/nathi_data-analysis/preprocessing" - dataDir <- "/home/jengel/nathalie/20231210-field-data" - figsGasConcDir <- "/home/jengel/nathalie/20231210-figs_gas_conc" - outDir <- "/home/jengel/nathalie/20231210-script-output" -} else { - # error catching - stop("ERROR no valid username") -} - -# get date & time (e.g.: '20231211_195800') -currentDateTime <- format(Sys.time(), "%Y%m%d_%k%M%S") - -# change into main dir -setwd(mainDir) +f_main_data_preprocessing <- function(username, + process_co2, + process_ch4, + process_n2o, + create_plots_ghg_conc, + simulateGCdata, + mainDir, + dataDir, + figsGasConcDir, + outDir, + currentDateTime, + measCamp, + expYear){ + +# print info +print(paste0(">>>> start data pre-processing for measurement campaign: ", measCamp, expYear)) +print("") -# get additional data needed for flux calculation +# get the gas IDs source("additional_info/gas_id.txt") -source("additional_info/fixed_values.txt") - -# load libraries -# optional install (if not already installed) and load dplyr package -# if(!require(dplyr)){install.packages('dplyr')} -library(dplyr) -#if(!require(tidyverse)){install.packages('tidyverse')} -library(tidyverse) - -if (calc_fluxes_with_gasfluxes_package == "T"){ - library(gasfluxes) -} -if (calc_fluxes_with_goflux_package == "T"){ - # TODO move from functions/calculate_fluxes_with_gofluxyourself_package.R to here -} - -# basic R settings -options("scipen"=100, "digits"=7) # force R not to use exponential notation for number of up to 7 digits - - -# =============================================================================================== # -# ------------------------------- Start data pre-processing ------------------------------------------- -# =============================================================================================== # # 1. Import all raw data (measurements and meta-data) ---- if (expYear == "2022"){ @@ -112,7 +43,7 @@ if (expYear == "2022"){ # get measurement data # ch4 = methane, co2 = carbon dioxide, conc = concentration, # st = soil temperature, sm = soil moisture, n2o = nitrous oxide - ch4_co2_n2o_conc_st_sm_par_data <- get_datalogger_raw_data(mainDir, dataDir, measCamp) + ch4_co2_n2o_conc_st_sm_par_data <- get_datalogger_raw_data(mainDir, dataDir, measCamp) if (process_aeris_raw_data == "T"){ co2_n2o_conc_data <- get_aeris_raw_data(mainDir, dataDir, measCamp) } @@ -125,30 +56,35 @@ if (expYear == "2022"){ source("functions/create_meas_period_metadata_df.R") measPeriodMetaDataDF <- create_measPeriodMetaDataDF(expYear, field_mp_meta_data_DF) - # 3. Sort measurement data per measurement period and gas ---- #--- 2022 ----------------------------------------------------------------------- if (expYear == "2022"){ - if (process_co2 == "T"){ - source("functions/sort_co2_conc_by_meas_period.R") - # create DF with licor CO2 measured data, sorted by meas_ID - # uses: measPeriodMetaDataDF - measPeriodCo2 <- create_co2_conc_sorted_by_mp(mainDir, dataDir, expYear) - } - - if (process_ch4 == "T"){ - source("functions/sort_ch4_conc_by_meas_period.R") - # create DF with licor CH4 measured data, sorted by meas_ID - # uses: measPeriodMetaDataDF - measPeriodCh4 <- create_ch4_conc_sorted_by_mp(mainDir, dataDir, expYear) - } + # if (process_co2 == "T"){ + # source("functions/sort_co2_conc_by_meas_period.R") + # # create DF with licor CO2 measured data, sorted by meas_ID + # # uses: measPeriodMetaDataDF + # measPeriodCo2 <- create_co2_conc_sorted_by_mp(mainDir, dataDir, expYear, + # measPeriodMetaDataDF, + # ch4_co2_n2o_conc_st_sm_par_data) + # } + + # if (process_ch4 == "T"){ + # source("functions/sort_ch4_conc_by_meas_period.R") + # # create DF with licor CH4 measured data, sorted by meas_ID + # # uses: measPeriodMetaDataDF + # measPeriodCh4 <- create_ch4_conc_sorted_by_mp(mainDir, dataDir, expYear, + # measPeriodMetaDataDF, + # ch4_co2_n2o_conc_st_sm_par_data) + # } if (process_n2o == "T"){ source("functions/sort_n2o_conc_by_meas_period.R") # create DF with Aeris N2O measured data, sorted by meas_ID - # uses: measPeriodMetaDataDF - measPeriodN2o <- create_n2o_data_sorted_by_measurements(mainDir, dataDir, expYear) + measPeriodN2o <- create_n2o_data_sorted_by_measurements(mainDir, dataDir, expYear, + measPeriodMetaDataDF, + ch4_co2_n2o_conc_st_sm_par_data, + gas_ID$n2o_aeris) } #--- 2023 ----------------------------------------------------------------------- } else if (expYear == "2023"){ @@ -169,8 +105,15 @@ if (expYear == "2022"){ if (process_n2o == "T"){ source("functions/sort_n2o_conc_by_meas_period_2023.R") # create DF with Aeris N2O measured data, sorted by meas_ID - # uses: measPeriodMetaDataDF - measPeriodN2o <- create_n2o_conc_sorted_by_measurements(mainDir, dataDir, expYear) + list_hlp <- f_create_n2o_conc_sorted_by_measurements(field_mp_meta_data_DF, + measPeriodMetaDataDF, + ch4_co2_n2o_conc_st_sm_par_data, + gas_ID$n2o_aeris) + + # extract the two DF from list + measPeriodN2o <- data.frame() + measPeriodMetaDataDF <- as.data.frame(list_hlp[[1]]) + measPeriodN2o <- as.data.frame(list_hlp[[2]]) } } @@ -186,6 +129,10 @@ if (create_plots_ghg_conc == "T"){ #--- 2023 ----------------------------------------------------------------------- } else if (expYear == "2023"){ source("functions/plot_ghg_conc_2023.R") + f_plot_ghg_conc_2023_n2o_non_mod_mp(process_n2o, + figsGasConcDir, + expYear, + measPeriodN2o) } } @@ -264,9 +211,10 @@ if (simulateGCdata == "T"){ print(">> start sampling data for GC simulation") # gas-concentration data sampling for simulation of GC (gas chromatograph) - # this makes available the "additional_info/simulate_gc_n2o_data.txt" source("functions/sample_gas_concentrations_for_gc_simulation_per_mp.R") - + # load the info file for simulateGCdata + source("additional_info/simulate_gc_n2o_data.txt") + # create new empty DF for sampled data simGCsamplesDF <- data.frame() @@ -382,85 +330,8 @@ if (length(rowsAllInvalidMP) > 0){ print(" did not find any invalid mp") } -# 8. Pass data DF to new DF used for calculations ---- - -# create DF used with analysis and calculations -ppN2OdataDF <- selValidMPoutputDF - -# =============================================================================================== # -# ------------------------------- End data pre-processing ----------------------------------------- -# =============================================================================================== # - - -# =============================================================================================== # -# ------------------------------- Flux calculation ---------------------------------------------- -# =============================================================================================== # - -# 9. Create input for goFlux package ---- -source("functions/create_input_for_goflux_package.R") -# output: goflux_input_n2o_df + csv file -# (to do ONLY IF IT DOESN'T RUN AUTOMATICALLY : change output name of csv file: add current date (not time)) - -# 10. Calculate fluxes with goFlux package ---- -if (calc_fluxes_with_goflux_package == "T"){ - print("") - print(">> prepare DF for and calculate fluxes with the goflux package") - - # calc fluxes - # simply source the file and run code (not a function yet) - # output best_n2o_flux_df + pdf scatterplots and a .csv file saved to 'outDir' - source("functions/calculate_fluxes_with_goflux_package.R") -} -# to do's: add date + measCamp to output pdf (line 35) and .csv file (same as above 8.) -# output should go to /home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output/flux_analyses/202305/ -# ofc, there are folders 202307 and 202309, too. -# starts from line 33 onwards. Is currently stored in the wrong place, not sure how to change it - - -# =============================================================================================== # -# ------------------------------- Data analysis ---------------------------------------------- -# =============================================================================================== # - -# 11. Combine all data and meta data---- -# output: one dataframe that can be used for data analysis and visualisation -# source("/functions/combine_all_data_and_meta_data.R") -# to do: probably adjust code so it can run automatically -# change output: add measCamp and date (line 124-134) -# output should be stored here: /home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output/flux_analyses/all/ - -# 12. Combine all measCamps ---- - -# 13. Visualise data ---- -# does not need to run automatically -#source("/functions/combine_all_meas_campaigns.R") - - - - - - - - - - - - - - -# important!!! all is in UTC but needs to be in Sweden time in the end +# return metaData and data DF +return(list(measPeriodMetaDataDF, selValidMPoutputDF)) -# Bsp von how to modify data function <- do not write in main script -# but as separate function -# source("modify_data.r") -# licor_data_mod <- modify_licor_data(licor_data) -# -# -# ## "modify_data.r" -# modify_licor_data <- function(licor_data){ -# -# licor_data_mod <- licor_data +1 -# -# return(licor_data_mod) -# -# } +} # end of f_main_data_preprocessing() diff --git a/preprocessing/main_flux_analysis.R b/preprocessing/main_flux_analysis.R new file mode 100644 index 0000000..04da2f0 --- /dev/null +++ b/preprocessing/main_flux_analysis.R @@ -0,0 +1,27 @@ +# +# Main script for analysis of fluxes calculated with 'main_flux_calculation.R' +# +# + + +# =============================================================================================== # +# ------------------------------- Flux analysis ------------------------------------------------- +# =============================================================================================== # + +# 1. Combine all data and meta data---- +# output: one dataframe that can be used for data analysis and visualisation +# source("/functions/combine_all_data_and_meta_data.R") +# to do: probably adjust code so it can run automatically +# change output: add measCamp and date (line 124-134) +# output should be stored here: /home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output/flux_analyses/all/ + +# 2. Combine all measCamps ---- + +# 3. Visualise data ---- +# does not need to run automatically +#source("/functions/combine_all_meas_campaigns.R") + + +# TODO ? +# important!!! all is in UTC but needs to be in Sweden time in the end + diff --git a/preprocessing/main_flux_calculation.R b/preprocessing/main_flux_calculation.R new file mode 100644 index 0000000..6562668 --- /dev/null +++ b/preprocessing/main_flux_calculation.R @@ -0,0 +1,32 @@ +# +# Main script for calculating the gas fluxes from the data prepared with 'main_preprocessing.R' +# +# + + +# =============================================================================================== # +# ------------------------------- Flux calculation ---------------------------------------------- +# =============================================================================================== # + +# 1. Create input for goFlux package ---- +source("functions/create_input_for_goflux_package.R") +# output: goflux_input_n2o_df + csv file +# (to do ONLY IF IT DOESN'T RUN AUTOMATICALLY : change output name of csv file: add current date (not time)) + +# 2. Calculate fluxes with goFlux package ---- +if (calc_fluxes_with_goflux_package == "T"){ + print("") + print(">> prepare DF for and calculate fluxes with the goflux package") + + # calc fluxes + # simply source the file and run code (not a function yet) + # output best_n2o_flux_df + pdf scatterplots and a .csv file saved to 'outDir' + source("functions/calculate_fluxes_with_goflux_package.R") +} + +# TODO: +# * add date + measCamp to output pdf (line 35) and .csv file (same as above 8.) +# * output should go to /home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output/flux_analyses/202305/ +# * ofc, there are folders 202307 and 202309, too. +# * starts from line 33 onwards. Is currently stored in the wrong place, not sure how to change it + diff --git a/preprocessing/run_all_main.R b/preprocessing/run_all_main.R new file mode 100644 index 0000000..3d3901e --- /dev/null +++ b/preprocessing/run_all_main.R @@ -0,0 +1,148 @@ +# +# run pre-processing of data and flux calculation for selected measurement campaings +# +# +# status: works with N2O and 2023 measCamp +# + + +# =============================================================================================== # +# script settings ---- +# =============================================================================================== # + +# list of measurement campaigns to run [YYYYMM] +measCampList <- c("202305", "202307", "202309") + +# gas type to process [T,F] +process_co2 <- "F" +process_ch4 <- "F" +process_n2o <- "T" +# how to calculate gas fluxes +calc_fluxes_with_gasfluxes_package <- "F" +calc_fluxes_with_goflux_package <- "T" + +# process aeris raw data (depending on measurement campaign) [T,F] +# FALSE by default +# might be set to TRUE for individual measurement campaigns (see below) TODO this DF is not used anyway! +process_aeris_raw_data <- "F" + +# plot figures [T,F] +create_plots_ghg_conc <- "T" + +# use the PGA data to simulate GC data, and run only specific filter/correction functions +simulateGCdata <- "F" # [F,T] + + +# =============================================================================================== # +# init & get main variables & settings, set directories, change into main dir, load R libraries ---- +# =============================================================================================== # + +# set dirs +# main working directory (version control with gitlab) +# data dir (files with field data) +# differentiate between users +username <- Sys.getenv("USER") +if (username == "ntriches"){ + # Nathalie + mainDir <- "/home/ntriches/git_repo/data-analysis/preprocessing" + dataDir <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/data" + figsGasConcDir <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/figures_processing" + outDir <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output" +} else if (username == "jengel"){ + # Jan + mainDir <- "/home/jengel/repo_git/nathi_data-analysis/preprocessing" + dataDir <- "/home/jengel/nathalie/20231210-field-data" + figsGasConcDir <- "/home/jengel/nathalie/20231210-figs_gas_conc" + outDir <- "/home/jengel/nathalie/20231210-script-output" +} else { + # error catching + stop("ERROR no valid username") +} + +# get date & time (e.g.: '20231211_195800') +currentDateTime <- format(Sys.time(), "%Y%m%d_%k%M%S") + +# change into main dir +setwd(mainDir) + +# get additional data needed for flux calculation +source("additional_info/gas_id.txt") +source("additional_info/fixed_values.txt") + +# load libraries +library(dplyr) +library(tidyverse) +# optional install (if not already installed) and load dplyr package +#if(!require(dplyr)){install.packages('dplyr')} +#if(!require(tidyverse)){install.packages('tidyverse')} + +if (calc_fluxes_with_gasfluxes_package == "T"){ + library(gasfluxes) +} +if (calc_fluxes_with_goflux_package == "T"){ + # TODO move from functions/calculate_fluxes_with_gofluxyourself_package.R to here +} + +# basic R settings +options("scipen"=100, "digits"=7) # force R not to use exponential notation for number of up to 7 digits + + +# =============================================================================================== # +# ------------------------------- Data pre-processing ------------------------------------------- +# =============================================================================================== # + +source("main_data_preprocessing.R") + +#####testing +measCampList <- c("202307") + +# create new empty DF for data and metaData +ppN2OdataDF <- data.frame() +measPeriodMetaDataDF <- data.frame() + +# loop over measCampList +for (measCamp in measCampList){ + + # read raw data from Aeris output file + if (measCamp == "202307") process_aeris_raw_data <- "T" + + # extract expYear ('substr(x, start, stop)') + expYear <- substring(measCamp, 1, 4) + + # run function + list_hlp <- f_main_data_preprocessing(username, + process_co2, + process_ch4, + process_n2o, + create_plots_ghg_conc, + simulateGCdata, + mainDir, + dataDir, + figsGasConcDir, + outDir, + currentDateTime, + measCamp, + expYear) + + # extract the two DF from list + measPeriodMetaDataDF <- rbind(measPeriodMetaDataDF, as.data.frame(list_hlp[[1]])) + ppN2OdataDF <- rbind(ppN2OdataDF, as.data.frame(list_hlp[[2]])) +} + + + +# =============================================================================================== # +# ------------------------------- Flux calculation ---------------------------------------------- +# =============================================================================================== # + + + + +# =============================================================================================== # +# ------------------------------- Flux analysis ------------------------------------------------- +# =============================================================================================== # + + + + + diff --git a/preprocessing/run_main_script.sh b/preprocessing/run_main_script.sh deleted file mode 100755 index 0860f43..0000000 --- a/preprocessing/run_main_script.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/bash - -## -## run the preprocessing main script -## 'main_script.R' -## - -# experiment years & measurement campaigns -## 2022 -## 09 -## -## 2023 -## 05 07 09 -## -## 2024 -## tbd -## - -# arguments: -# 1) expYear (e.g.: 2023) -# 2) expMonth (e.g.: 09) - - -# execute the preprocessing for the measurement campaigns the script is working for -Rscript --vanilla ./main_script.R 2023 05 -Rscript --vanilla ./main_script.R 2023 07 -Rscript --vanilla ./main_script.R 2023 09 -- GitLab From 52a49cb32737d5b81bf288a2e20f99620d889fbb Mon Sep 17 00:00:00 2001 From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de> Date: Mon, 19 Feb 2024 16:41:59 +0100 Subject: [PATCH 02/24] modify measID to include also measCamp, and run pre-processing for all 2023 measCamp * works for normal & simGC mode * gives a warning about creating some NA during the process - unsure where this happens --- .../create_meas_period_metadata_df.R | 22 ++++++++++++++++--- preprocessing/main_data_preprocessing.R | 5 +++-- preprocessing/run_all_main.R | 11 ++++++---- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/preprocessing/functions/create_meas_period_metadata_df.R b/preprocessing/functions/create_meas_period_metadata_df.R index 5cce89c..d10bdaa 100644 --- a/preprocessing/functions/create_meas_period_metadata_df.R +++ b/preprocessing/functions/create_meas_period_metadata_df.R @@ -4,9 +4,9 @@ ## new data may be added with further steps of preprocessing the data ## -create_measPeriodMetaDataDF <- function(expYear, field_mp_meta_data_DF){ +create_measPeriodMetaDataDF <- function(expYear, measCamp, field_mp_meta_data_DF){ - print("start create meta data DF") + print(">> start create meta data DF") # number of measurement periods numRows <- nrow(field_mp_meta_data_DF) @@ -26,6 +26,22 @@ create_measPeriodMetaDataDF <- function(expYear, field_mp_meta_data_DF){ "micro_habitat" = field_mp_meta_data_DF$micro_habitat, # field-recorded micro habitat "chamber_volume_m3" = rep(NA, numRows)) # depends on collar heights, can be different for every measurement period + ## add the measCamp as integer to the measID + if (measCamp == "202305"){ + measCampInt <- 2023050000 + } else if(measCamp == "202307"){ + measCampInt <- 2023070000 + } else if(measCamp == "202309"){ + measCampInt <- 2023090000 + } else { + # error, unknown measCamp + print("") + stop("ERROR - unknown measCamp") + print("") + } + mpMetaDataDF$meas_ID <- measCampInt + mpMetaDataDF$meas_ID + + ## 2022 #### @@ -45,7 +61,7 @@ mpMetaDataDF["collar_height_av_cm"] <- collar_height_average # (if ... else) # fill into - print("end create meta data DF") + print(">> end create meta data DF") return(mpMetaDataDF) } diff --git a/preprocessing/main_data_preprocessing.R b/preprocessing/main_data_preprocessing.R index da54b80..77b51d8 100644 --- a/preprocessing/main_data_preprocessing.R +++ b/preprocessing/main_data_preprocessing.R @@ -16,6 +16,7 @@ f_main_data_preprocessing <- function(username, process_co2, process_ch4, process_n2o, + process_aeris_raw_data, create_plots_ghg_conc, simulateGCdata, mainDir, @@ -27,7 +28,7 @@ f_main_data_preprocessing <- function(username, expYear){ # print info -print(paste0(">>>> start data pre-processing for measurement campaign: ", measCamp, expYear)) +print(paste0(">>>> start data pre-processing for measurement campaign: ", measCamp)) print("") # get the gas IDs @@ -54,7 +55,7 @@ if (expYear == "2022"){ # 2. Create mp meta data DF ---- source("functions/create_meas_period_metadata_df.R") -measPeriodMetaDataDF <- create_measPeriodMetaDataDF(expYear, field_mp_meta_data_DF) +measPeriodMetaDataDF <- create_measPeriodMetaDataDF(expYear, measCamp, field_mp_meta_data_DF) # 3. Sort measurement data per measurement period and gas ---- diff --git a/preprocessing/run_all_main.R b/preprocessing/run_all_main.R index 3d3901e..fbcc48d 100644 --- a/preprocessing/run_all_main.R +++ b/preprocessing/run_all_main.R @@ -93,9 +93,6 @@ options("scipen"=100, "digits"=7) # force R not to use exponential notation for source("main_data_preprocessing.R") -#####testing -measCampList <- c("202307") - # create new empty DF for data and metaData ppN2OdataDF <- data.frame() measPeriodMetaDataDF <- data.frame() @@ -104,7 +101,12 @@ measPeriodMetaDataDF <- data.frame() for (measCamp in measCampList){ # read raw data from Aeris output file - if (measCamp == "202307") process_aeris_raw_data <- "T" + if (measCamp == "202307"){ + process_aeris_raw_data <- "T" + # set back to FALSE for any other measCamp + } else{ + process_aeris_raw_data <- "F" + } # extract expYear ('substr(x, start, stop)') expYear <- substring(measCamp, 1, 4) @@ -114,6 +116,7 @@ for (measCamp in measCampList){ process_co2, process_ch4, process_n2o, + process_aeris_raw_data, create_plots_ghg_conc, simulateGCdata, mainDir, -- GitLab From 87f9bc2ae12eb7ce7593879534e7eae4dcbb45c7 Mon Sep 17 00:00:00 2001 From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de> Date: Tue, 20 Feb 2024 13:04:32 +0100 Subject: [PATCH 03/24] modify flux calculation & analysis * should now run automatically with the DF provided by pre-processing * remove the file 'combine_all_meas_campaigns.R' and moved its code into 'combine_all_data_and_meta_data.R' * not yet tested --- .../calculate_fluxes_with_goflux_package.R | 38 ++---- .../combine_all_data_and_meta_data.R | 118 +++++++++++------- .../functions/combine_all_meas_campaigns.R | 78 ------------ .../create_input_for_goflux_package.R | 12 +- .../functions/visualise_fluxes_from_goflux.R | 63 +++++----- preprocessing/main_data_preprocessing.R | 1 + preprocessing/main_flux_analysis.R | 15 ++- preprocessing/main_flux_calculation.R | 21 +++- preprocessing/run_all_main.R | 26 +++- 9 files changed, 177 insertions(+), 195 deletions(-) delete mode 100644 preprocessing/functions/combine_all_meas_campaigns.R diff --git a/preprocessing/functions/calculate_fluxes_with_goflux_package.R b/preprocessing/functions/calculate_fluxes_with_goflux_package.R index d49ba36..d139bfb 100644 --- a/preprocessing/functions/calculate_fluxes_with_goflux_package.R +++ b/preprocessing/functions/calculate_fluxes_with_goflux_package.R @@ -1,54 +1,40 @@ # calc fluxes with GoFlux package - - -# install package remotes -if (!require("remotes", quietly = TRUE)) - install.packages("remotes") - -# install package GoFlux every time to ensure using the most recent version -remotes::install_github("Qepanna/goFlux") - -# load library -library(goFlux) -library(tidyverse) - +# +# +# input DF: goflux_input_n2o_df +# +# output DF: goflux_best_n2o_flux_df +# ## use DF provided by "functions/create_input_for_goflux_package.R" ## either read input file or use DF directly -# read input file -#goflux_input_n2o_df <- read.csv("/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output/measCamp_202305goflux_input_n2o_df_20240131.csv", -# sep = ";", na = "NA") - # remove all NAs goflux_input_n2o_df <- goflux_input_n2o_df %>% drop_na() # remove all rows with a missing value in any column + # calculate fluxes n2o_flux_result_df <- goFlux(goflux_input_n2o_df, "N2Odry_ppb", prec = 0.2) # choose best flux -best_n2o_flux_df <- best.flux(flux.result = n2o_flux_result_df, g.limit = 1.25) +goflux_best_n2o_flux_df <- best.flux(flux.result = n2o_flux_result_df, g.limit = 1.25) ?best.flux - # plot results of best fluxes -plot_list <- flux.plot(flux.results = best_n2o_flux_df, dataframe = goflux_input_n2o_df, gastype = "N2Odry_ppb", +plot_list <- flux.plot(flux.results = goflux_best_n2o_flux_df, dataframe = goflux_input_n2o_df, gastype = "N2Odry_ppb", shoulder = 20, plot.legend = c("MAE", "RMSE", "AICc", "SE", "r2", "g.factor"), best.model = TRUE) - -# save output file as pdf - figures +# save output file as pdf - figures (this function does save files to the 'present working dir' - hence, changing into 'outDir') setwd(outDir) flux2pdf(plot.list = plot_list, outfile = NULL, width = 11.6, height = 8.2) +setwd(mainDir) # save output files - flux calculations # write.table(n2o_flux_result, # paste0(outDir, "/measCamp_", measCamp, "goflux_goflux_output.csv"), # row.names = FALSE, quote = FALSE, sep = ";", col.names = TRUE, na = "NA") -write.table(best_n2o_flux_df, +write.table(goflux_best_n2o_flux_df, paste0(outDir, "/measCamp_", measCamp, "goflux_bestflux_output.csv"), row.names = FALSE, quote = FALSE, sep = ";", col.names = TRUE, na = "NA") -setwd(mainDir) - - diff --git a/preprocessing/functions/combine_all_data_and_meta_data.R b/preprocessing/functions/combine_all_data_and_meta_data.R index 32668df..342bf5e 100644 --- a/preprocessing/functions/combine_all_data_and_meta_data.R +++ b/preprocessing/functions/combine_all_data_and_meta_data.R @@ -1,17 +1,10 @@ -# aim: combine all data and meta data - -## input files with flux calculation results provided by "functions/calculate_fluxes_with_goflux_package.R" -# spring 2023 -goflux_results_file <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/flux_analyses/202305/measCamp_202305goflux_bestflux_output.csv" -# summer 2023 -goflux_results_file <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/flux_analyses/202307/measCamp_202307goflux_bestflux_output.csv" -# autumn 2023 -goflux_results_file <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/flux_analyses/202309/measCamp_202309goflux_bestflux_output.csv" - -## use either input file: -goflux_results_df <- read.table(goflux_results_file, sep = ";", header = TRUE, stringsAsFactors = FALSE) -## or use DF (from "functions/calculate_fluxes_with_goflux_package.R") -goflux_results_df <- best_n2o_flux_df +# combine all data and meta data +# +# +# input DF: goflux_best_n2o_flux_df +# +# output DF: goflux_data_metadata_df +# # create df with all needed variables: @@ -21,9 +14,9 @@ goflux_results_df <- best_n2o_flux_df # flux estimates for N2O, CH4, CO2 # add start_time to plot the whole year -measurement_periods_total <- nrow(goflux_results_df) +measurement_periods_total <- nrow(goflux_best_n2o_flux_df) print(paste0("number of mp: ", measurement_periods_total)) -str(goflux_results_df) +str(goflux_best_n2o_flux_df) # run this for all measurement campaigns goflux_data_metadata_df <- data.frame( datetime = as.POSIXct(rep("1000-01-01 00:00:00", measurement_periods_total), format = "%Y-%m-%d %H:%M:%S", tz="UTC" ), @@ -48,16 +41,16 @@ goflux_data_metadata_df <- data.frame( ) # fill in values from measPeriodMetaDataDF (same amount of rows) and -# goflux_results_df with best flux = N2O nmol +# goflux_best_n2o_flux_df with best flux = N2O nmol # remove invalid mps measPeriodMetaDataDF <- measPeriodMetaDataDF %>% filter(is_mp_valid == 1) # loop over all mp provided by goflux package output individually -for (gf_meas_ID in goflux_results_df$UniqueID) { +for (gf_meas_ID in goflux_best_n2o_flux_df$UniqueID) { #print(gf_meas_ID) # get the rows corresponding with the gf_meas_ID - line_Num_gf_results_df <- which(goflux_results_df$UniqueID == gf_meas_ID) + line_Num_gf_results_df <- which(goflux_best_n2o_flux_df$UniqueID == gf_meas_ID) line_Num_metadata_df <- which(measPeriodMetaDataDF$meas_ID == gf_meas_ID) # choose only n2o mps measured with Aeris gas analyser @@ -69,16 +62,16 @@ for (gf_meas_ID in goflux_results_df$UniqueID) { goflux_data_metadata_df$plot_no[line_Num_gf_results_df] <- measPeriodMetaDataDF$plot_no[line_Num_metadata_df] goflux_data_metadata_df$micro_habitat[line_Num_gf_results_df] <- measPeriodMetaDataDF$micro_habitat[line_Num_metadata_df] goflux_data_metadata_df$light_dark[line_Num_gf_results_df] <- measPeriodMetaDataDF$light_dark[line_Num_metadata_df] - goflux_data_metadata_df$n2o_nmolm2sec1[line_Num_gf_results_df] <- goflux_results_df$best.flux[line_Num_metadata_df] - goflux_data_metadata_df$n2o_LMFlux[line_Num_gf_results_df] <- goflux_results_df$LM.flux[line_Num_metadata_df] - goflux_data_metadata_df$n2o_HMFlux[line_Num_gf_results_df] <- goflux_results_df$HM.flux[line_Num_metadata_df] - goflux_data_metadata_df$n2o_bestFlux[line_Num_gf_results_df] <- goflux_results_df$best.flux[line_Num_metadata_df] - goflux_data_metadata_df$n2o_bestModel[line_Num_gf_results_df] <- goflux_results_df$model[line_Num_metadata_df] + goflux_data_metadata_df$n2o_nmolm2sec1[line_Num_gf_results_df] <- goflux_best_n2o_flux_df$best.flux[line_Num_metadata_df] + goflux_data_metadata_df$n2o_LMFlux[line_Num_gf_results_df] <- goflux_best_n2o_flux_df$LM.flux[line_Num_metadata_df] + goflux_data_metadata_df$n2o_HMFlux[line_Num_gf_results_df] <- goflux_best_n2o_flux_df$HM.flux[line_Num_metadata_df] + goflux_data_metadata_df$n2o_bestFlux[line_Num_gf_results_df] <- goflux_best_n2o_flux_df$best.flux[line_Num_metadata_df] + goflux_data_metadata_df$n2o_bestModel[line_Num_gf_results_df] <- goflux_best_n2o_flux_df$model[line_Num_metadata_df] } # fill in values from ppN2OdataDF (differing amount of rows) # loop over all mp provided by gas_fluxes pkg output individually -for (gf_meas_ID in goflux_results_df$UniqueID) { +for (gf_meas_ID in goflux_best_n2o_flux_df$UniqueID) { line_Num_metadata_df <- which(measPeriodMetaDataDF$meas_ID == gf_meas_ID) # choose only n2o mps measured with Aeris gas analyser @@ -127,20 +120,61 @@ for (gf_meas_ID in goflux_results_df$UniqueID) { # } # write DF goflux_data_metadata_df -# 202305 May campaign -write.table(goflux_data_metadata_df, "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/flux_analyses/all/20240131_goflux_n2o_data_metadata_202305.csv", - row.names = FALSE, quote = FALSE, sep = ",") - -# 202307 July 2023 campaign -write.table(goflux_data_metadata_df, "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/flux_analyses/all/20240131_goflux_n2o_data_metadata_202307.csv", - row.names = FALSE, quote = FALSE, sep = ",") -# 202309 Sept 2023 campaign -write.table(goflux_data_metadata_df, "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/flux_analyses/all/20240131_goflux_n2o_data_metadata_202309.csv", - row.names = FALSE, quote = FALSE, sep = ",") - - - - - - - +write.table(goflux_data_metadata_df, + paste0(fluxResOutDir, "/all/", currentDateTime, "_goflux_n2o_data_metadata.csv"), + row.names = FALSE, + quote = FALSE, + sep = ",") + +# Corrections of the DF #### + +# change datetime to POSIXct +goflux_data_metadata_df$datetime <- as.POSIXct(goflux_data_metadata_df$datetime, format = "%Y-%m-%d %H:%M:%S", tz="UTC") +# remove white space from micro habitats +goflux_data_metadata_df$micro_habitat <- gsub(" ", "", goflux_data_metadata_df$micro_habitat) +# remove white space from light_dark +goflux_data_metadata_df$light_dark <- gsub(" ", "", goflux_data_metadata_df$light_dark) +# change dak to dark +goflux_data_metadata_df$light_dark[which(goflux_data_metadata_df$light_dark=="dak")] <- "dark" +# change palsa-moss to palsa_moss +goflux_data_metadata_df$micro_habitat[which(goflux_data_metadata_df$micro_habitat=="palsa-moss")] <- "palsa_moss" + +# remove first mp / row +# goflux_data_metadata_df <- goflux_data_metadata_df[-1, ] + +# remove values below -1000 +goflux_data_metadata_df <- goflux_data_metadata_df %>% + filter(n2o_nmolm2sec1 > -500) + +NAs <- complete.cases(goflux_data_metadata_df) +goflux_data_metadata_df <- goflux_data_metadata_df %>% + filter(!NAs) + + +# create new DF - one per measCampaign +# needs 'stringr' library +# selection based on 'datetime' column containing "year-month" +# and replace 'measCamp' column values with a string of the name of the month + +measCampSpring <- goflux_data_metadata_df %>% + filter(str_detect(datetime, "2023-05")) %>% + mutate(measCamp="May") + +measCampSummer <- goflux_data_metadata_df %>% + filter(str_detect(datetime, "2023-07")) %>% + mutate(measCamp="July") + +measCampAutumn <- goflux_data_metadata_df %>% + filter(str_detect(datetime, "2023-09")) %>% + mutate(measCamp="September") + +# combine data frames from all measurement campaigns to one data set +goflux_data_metadata_df <- measCampSpring +goflux_data_metadata_df <- rbind(goflux_data_metadata_df, measCampSummer, measCampAutumn) + +# TODO write out the DF as file ? + +print("") +print(">>> finished <<<") +print(" the DF goflux_data_metadata_df is ow available in your global environment") +print("") \ No newline at end of file diff --git a/preprocessing/functions/combine_all_meas_campaigns.R b/preprocessing/functions/combine_all_meas_campaigns.R deleted file mode 100644 index ada822d..0000000 --- a/preprocessing/functions/combine_all_meas_campaigns.R +++ /dev/null @@ -1,78 +0,0 @@ -# Read in files #### -# output files from create_dataframe_for_analysis_from_goflux function - -# load libraries -library(dplyr) -library(stringr) - - -## input files provided by "functions/combine_all_data_and_meta_data.R" -# spring 2023 -goflux_metadata_file_2305 <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/flux_analyses/all/20240131_goflux_n2o_data_metadata_202305.csv" -# summer 2023 -goflux_metadata_file_2307 <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/flux_analyses/all/20240131_goflux_n2o_data_metadata_202307.csv" -# autumn 2023 -goflux_metadata_file_2309 <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/flux_analyses/all/20240131_goflux_n2o_data_metadata_202309.csv" - -## read input files - one per measCamp -goflux_metadata_df_2305 <- read.table(goflux_metadata_file_2305, header = TRUE, sep = ",", stringsAsFactors = FALSE) -# are there NAs in the n2o fluxes within the data frame? -#goflux_metadata_df_2305[is.na(goflux_metadata_df_2305$n2o_nmolm2sec1), ] # yes -# drop NA rows -#goflux_metadata_df_2305_noNA <- goflux_metadata_df_2305[-c(628:667), ] -#goflux_metadata_df_2305_noNA[is.na(goflux_metadata_df_2305_noNA$n2o_nmolm2sec1), ] # no -goflux_metadata_df_2307 <- read.table(goflux_metadata_file_2307, header = TRUE, sep = ",", stringsAsFactors = FALSE) -goflux_metadata_df_2309 <- read.table(goflux_metadata_file_2309, header = TRUE, sep = ",", stringsAsFactors = FALSE) - -# combine data frames from all measurement campaigns to one data set -flux_data_meta_data_all_measCamp <- goflux_metadata_df_2305 -flux_data_meta_data_all_measCamp <- rbind(flux_data_meta_data_all_measCamp, goflux_metadata_df_2307, goflux_metadata_df_2309) -str(flux_data_meta_data_all_measCamp) - -# Corrections of the DF #### - -# change datetime to POSIXct -flux_data_meta_data_all_measCamp$datetime <- as.POSIXct(flux_data_meta_data_all_measCamp$datetime, format = "%Y-%m-%d %H:%M:%S", tz="UTC") -# remove white space from micro habitats -flux_data_meta_data_all_measCamp$micro_habitat <- gsub(" ", "", flux_data_meta_data_all_measCamp$micro_habitat) -# remove white space from light_dark -flux_data_meta_data_all_measCamp$light_dark <- gsub(" ", "", flux_data_meta_data_all_measCamp$light_dark) -# change dak to dark -flux_data_meta_data_all_measCamp$light_dark[which(flux_data_meta_data_all_measCamp$light_dark=="dak")] <- "dark" -# change palsa-moss to palsa_moss -flux_data_meta_data_all_measCamp$micro_habitat[which(flux_data_meta_data_all_measCamp$micro_habitat=="palsa-moss")] <- "palsa_moss" - -# remove first mp / row -# flux_data_meta_data_all_measCamp <- flux_data_meta_data_all_measCamp[-1, ] - -# remove values below -1000 -#library(dplyr) -flux_data_meta_data_all_measCamp <- flux_data_meta_data_all_measCamp %>% - filter(n2o_nmolm2sec1 > -500) - -NAs <- complete.cases(flux_data_meta_data_all_measCamp) -flux_data_meta_data_all_measCamp <- flux_data_meta_data_all_measCamp %>% - filter(!NAs) - - -# create new DF - one per measCampaign -# selection based on 'datetime' column containing "year-month" -# and replace 'measCamp' column values with a string of the name of the month - -#library(stringr) -measCampSpring <- flux_data_meta_data_all_measCamp %>% - filter(str_detect(datetime, "2023-05")) %>% - mutate(measCamp="May") - -measCampSummer <- flux_data_meta_data_all_measCamp %>% - filter(str_detect(datetime, "2023-07")) %>% - mutate(measCamp="July") - -measCampAutumn <- flux_data_meta_data_all_measCamp %>% - filter(str_detect(datetime, "2023-09")) %>% - mutate(measCamp="September") - -# combine data frames from all measurement campaigns to one data set -flux_data_meta_data_all_measCamp <- measCampSpring -flux_data_meta_data_all_measCamp <- rbind(flux_data_meta_data_all_measCamp, measCampSummer, measCampAutumn) - diff --git a/preprocessing/functions/create_input_for_goflux_package.R b/preprocessing/functions/create_input_for_goflux_package.R index c5bf4a3..85e9746 100644 --- a/preprocessing/functions/create_input_for_goflux_package.R +++ b/preprocessing/functions/create_input_for_goflux_package.R @@ -1,10 +1,15 @@ # Preparation of input files for gasfluxes package (same structure as HMR package) # main script until "sort data" needs to be run before this script runs -# (not embedded in main script yet) +# # -# output DF: gasfluxes_input_n2o_df +# input DF: ppN2OdataDF & measPeriodMetaDataDF # +# output DF: goflux_input_n2o_df +# + +# source file with fixed values needed for flux calculation +source("additional_info/fixed_values.txt") # N2O # first convert ppm to ppb and add as column to df @@ -26,9 +31,6 @@ empty_cols <- data.frame("plot_no" = rep(NA, numRows), # add 'empty_cols' DF to mPN2oPlotHabitat DF mPN2oPlotHabitat <- cbind(mPN2oPlotHabitat, empty_cols) -# source file with fixed values needed for flux calculation -source("additional_info/fixed_values.txt") - # loop over all mps for (iLine in 1:nrow(measPeriodMetaDataDF)){ # choose only n2o mps measured with Aeris gas analyser diff --git a/preprocessing/functions/visualise_fluxes_from_goflux.R b/preprocessing/functions/visualise_fluxes_from_goflux.R index 2d7293a..b319355 100644 --- a/preprocessing/functions/visualise_fluxes_from_goflux.R +++ b/preprocessing/functions/visualise_fluxes_from_goflux.R @@ -1,46 +1,51 @@ +# +# manual plotting of flux results from goFlux +# +# input DF goflux_data_metadata_df provided by 'combine_all_data_and_meta_data.R' +# # R base plots --------------------------------------------------------------- # select rows with onlylight / dark -rows_light <- which(flux_data_meta_data_all_measCamp$light_dark == "light") -rows_dark <- which(flux_data_meta_data_all_measCamp$light_dark == "dark") +rows_light <- which(goflux_data_metadata_df$light_dark == "light") +rows_dark <- which(goflux_data_metadata_df$light_dark == "dark") ### manually select -row_select <- 1:nrow(flux_data_meta_data_all_measCamp) # ALL rows. i.e, undo other selection +row_select <- 1:nrow(goflux_data_metadata_df) # ALL rows. i.e, undo other selection row_select <- rows_light row_select <- rows_dark -plot(flux_data_meta_data_all_measCamp$n2o_nmolm2sec1[row_select] ~ flux_data_meta_data_all_measCamp$datetime[row_select] +plot(goflux_data_metadata_df$n2o_nmolm2sec1[row_select] ~ goflux_data_metadata_df$datetime[row_select] ) #,ylim = c(-0.001,0.0030), col = "green3") abline(h=0, col = "green2") -fit <- lm(flux_data_meta_data_all_measCamp$n2o_nmolm2sec1[row_select] ~ flux_data_meta_data_all_measCamp$datetime[row_select]) +fit <- lm(goflux_data_metadata_df$n2o_nmolm2sec1[row_select] ~ goflux_data_metadata_df$datetime[row_select]) abline(fit, col="blue2") # ligth and dark row_select <- rows_light -plot(flux_data_meta_data_all_measCamp$n2o_HMFlux[row_select] ~ - flux_data_meta_data_all_measCamp$datetime[row_select], +plot(goflux_data_metadata_df$n2o_HMFlux[row_select] ~ + goflux_data_metadata_df$datetime[row_select], #) ylim = c(-50, 50), col = "orange", cex = 1, xlab = "months in 2023", ylab = 'N'['2']*'O' * ' flux ('* 'mg' ~N[2]*O-N~ m^-2~h^-1*')') row_select <- rows_dark -points(flux_data_meta_data_all_measCamp$n2o_nmolm2sec1[row_select] ~ flux_data_meta_data_all_measCamp$datetime[row_select], +points(goflux_data_metadata_df$n2o_nmolm2sec1[row_select] ~ goflux_data_metadata_df$datetime[row_select], col = "black", pch = 18, cex = 1, ) abline(h=0, col = "black") ## soil moist -plot(flux_data_meta_data_all_measCamp$n2o_nmolm2sec1[row_select] ~ flux_data_meta_data_all_measCamp$soil_moisture_12cm[row_select], +plot(goflux_data_metadata_df$n2o_nmolm2sec1[row_select] ~ goflux_data_metadata_df$soil_moisture_12cm[row_select], ylim = c(-0.001, 0.0015)) -fit <- lm(flux_data_meta_data_all_measCamp$n2o_nmolm2sec1[row_select] ~ flux_data_meta_data_all_measCamp$soil_moisture_12cm[row_select]) +fit <- lm(goflux_data_metadata_df$n2o_nmolm2sec1[row_select] ~ goflux_data_metadata_df$soil_moisture_12cm[row_select]) abline(fit, col="blue2") ## soil temp -plot(flux_data_meta_data_all_measCamp$n2o_nmolm2sec1[row_select] ~ flux_data_meta_data_all_measCamp$soil_temperature_15cm_s1[row_select], +plot(goflux_data_metadata_df$n2o_nmolm2sec1[row_select] ~ goflux_data_metadata_df$soil_temperature_15cm_s1[row_select], ylim = c(-0.001, 0.0015)) -fit <- lm(flux_data_meta_data_all_measCamp$n2o_nmolm2sec1[row_select] ~ flux_data_meta_data_all_measCamp$soil_temperature_15cm_s1[row_select]) +fit <- lm(goflux_data_metadata_df$n2o_nmolm2sec1[row_select] ~ goflux_data_metadata_df$soil_temperature_15cm_s1[row_select]) abline(fit, col="blue2") abline(h=0, col = "green2") @@ -97,7 +102,7 @@ symlog_trans <- function(base = 10, thr = 1, scale = 1){ # N2O fluxes over seasons #### -n2o_fluxes_seasons <- flux_data_meta_data_all_measCamp %>% +n2o_fluxes_seasons <- goflux_data_metadata_df %>% filter(plot_no != 8) %>% mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>% ggplot(aes(x = datetime, y = n2o_nmolm2sec1, )) + @@ -115,7 +120,7 @@ n2o_fluxes_seasons <- flux_data_meta_data_all_measCamp %>% n2o_fluxes_seasons # L + D -8 #### -n2o_fluxes_seasons2 <- flux_data_meta_data_all_measCamp %>% +n2o_fluxes_seasons2 <- goflux_data_metadata_df %>% filter(plot_no != 8) %>% mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>% ggplot(aes(x = datetime, y = n2o_nmolm2sec1, colour = light_dark, shape = light_dark)) + @@ -134,7 +139,7 @@ n2o_fluxes_seasons2 # palsa_lichen #### -n2o_season_palsalichen <- flux_data_meta_data_all_measCamp %>% +n2o_season_palsalichen <- goflux_data_metadata_df %>% filter(micro_habitat == "palsa_lichen") %>% mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>% ggplot(aes(x = datetime, y = n2o_nmolm2sec1, colour = light_dark, shape = light_dark)) + @@ -153,7 +158,7 @@ n2o_season_palsalichen <- flux_data_meta_data_all_measCamp %>% n2o_season_palsalichen # palsa moss 8#### -n2o_season_pm8 <- flux_data_meta_data_all_measCamp %>% +n2o_season_pm8 <- goflux_data_metadata_df %>% filter(micro_habitat == "palsa_moss") %>% filter(plot_no == 8) %>% mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>% @@ -173,7 +178,7 @@ n2o_season_pm8 <- flux_data_meta_data_all_measCamp %>% n2o_season_pm8 # palsa_moss -8 #### -n2o_season_palsamoss <- flux_data_meta_data_all_measCamp %>% +n2o_season_palsamoss <- goflux_data_metadata_df %>% filter(micro_habitat == "palsa_moss") %>% filter(plot_no != 8) %>% mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>% @@ -193,7 +198,7 @@ n2o_season_palsamoss <- flux_data_meta_data_all_measCamp %>% n2o_season_palsamoss # N2O fluxes over season: bog plots -n2o_season_bog <- flux_data_meta_data_all_measCamp %>% +n2o_season_bog <- goflux_data_metadata_df %>% filter(micro_habitat == "bog") %>% mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>% ggplot(aes(x = datetime, y = n2o_nmolm2sec1, colour = light_dark, shape = light_dark)) + @@ -212,7 +217,7 @@ n2o_season_bog <- flux_data_meta_data_all_measCamp %>% n2o_season_bog # N2O fluxes over season: fen plots -n2o_season_fen <- flux_data_meta_data_all_measCamp %>% +n2o_season_fen <- goflux_data_metadata_df %>% filter(micro_habitat == "fen") %>% mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>% ggplot(aes(x = datetime, y = n2o_nmolm2sec1, colour = light_dark, shape = light_dark)) + @@ -232,7 +237,7 @@ n2o_season_fen # Boxplots #### # L + D #### -dark_light_season <- flux_data_meta_data_all_measCamp %>% +dark_light_season <- goflux_data_metadata_df %>% mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>% ggplot(aes(x = micro_habitat, y = n2o_nmolm2sec1, colour = light_dark, shape = light_dark)) + scale_colour_manual(values = c("black", "orange")) + @@ -244,7 +249,7 @@ dark_light_season <- flux_data_meta_data_all_measCamp %>% dark_light_season # L + D -8 #### -dark_light_season <- flux_data_meta_data_all_measCamp %>% +dark_light_season <- goflux_data_metadata_df %>% mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>% mutate(micro_habitat = factor(micro_habitat, levels = c("palsa_lichen", "palsa_moss", "bog", "fen"))) %>% filter(plot_no != 8) %>% @@ -258,7 +263,7 @@ ggplot(aes(x = micro_habitat, y = n2o_nmolm2sec1, colour = light_dark, shape = l dark_light_season # Spatial variability overall #### -variability_transects <- flux_data_meta_data_all_measCamp %>% +variability_transects <- goflux_data_metadata_df %>% filter(plot_no != 8) %>% #filter(light_dark == "dark") %>% ggplot(aes(plot_no, n2o_nmolm2sec1, group = plot_no)) + @@ -273,7 +278,7 @@ variability_transects <- flux_data_meta_data_all_measCamp %>% variability_transects # Spatial variability per measCamp#### -variability_measCamp <- flux_data_meta_data_all_measCamp %>% +variability_measCamp <- goflux_data_metadata_df %>% filter(plot_no != 8) %>% ggplot(aes(micro_habitat, n2o_nmolm2sec1)) + geom_boxplot() + @@ -289,7 +294,7 @@ variability_measCamp <- flux_data_meta_data_all_measCamp %>% variability_measCamp # L per micro habitat #### -light <- flux_data_meta_data_all_measCamp %>% +light <- goflux_data_metadata_df %>% filter(plot_no != 8) %>% filter(light_dark == "light") %>% ggplot(aes(micro_habitat, n2o_nmolm2sec1)) + @@ -304,7 +309,7 @@ light <- flux_data_meta_data_all_measCamp %>% light # D per micro habitat #### -dark <- flux_data_meta_data_all_measCamp %>% +dark <- goflux_data_metadata_df %>% filter(plot_no != 8) %>% filter(light_dark == "dark") %>% ggplot(aes(micro_habitat, n2o_nmolm2sec1)) + @@ -320,7 +325,7 @@ dark # Soil temperature #### # Soil T4 overall #### -n2o_fluxes_soiltemp4 <- flux_data_meta_data_all_measCamp %>% +n2o_fluxes_soiltemp4 <- goflux_data_metadata_df %>% filter(plot_no != 8) %>% ggplot( aes(x = soil_temperature_15cm_s4, y = n2o_nmolm2sec1, colour = micro_habitat)) + geom_point(size = 2) + @@ -335,7 +340,7 @@ n2o_fluxes_soiltemp4 <- flux_data_meta_data_all_measCamp %>% n2o_fluxes_soiltemp4 # Soil T4 per micro habitat#### -n2o_fluxes_soiltemp4 <- flux_data_meta_data_all_measCamp %>% +n2o_fluxes_soiltemp4 <- goflux_data_metadata_df %>% filter(plot_no != 8) %>% ggplot( aes(x = soil_temperature_15cm_s1, y = n2o_nmolm2sec1, colour = micro_habitat, shape = micro_habitat)) + geom_point(size = 2) + @@ -353,7 +358,7 @@ n2o_fluxes_soiltemp4 # Soil moisture #### # Soil M 12cm #### -n2o_fluxes_soilmoist_12cm <- flux_data_meta_data_all_measCamp %>% +n2o_fluxes_soilmoist_12cm <- goflux_data_metadata_df %>% filter(plot_no != 8) %>% ggplot( aes(x = soil_moisture_12cm, y = n2o_nmolm2sec1, colour = micro_habitat)) + geom_point(size = 2) + @@ -369,7 +374,7 @@ n2o_fluxes_soilmoist_12cm <- flux_data_meta_data_all_measCamp %>% n2o_fluxes_soilmoist_12cm # Soil M 30 cm #### -n2o_fluxes_soilmoist_30cm <- flux_data_meta_data_all_measCamp %>% +n2o_fluxes_soilmoist_30cm <- goflux_data_metadata_df %>% filter(plot_no != 8) %>% filter(soil_moisture_30cm < 2000) %>% # remove some weird error values filter(soil_moisture_30cm > 0) %>% # remove 0s diff --git a/preprocessing/main_data_preprocessing.R b/preprocessing/main_data_preprocessing.R index 77b51d8..0ec132a 100644 --- a/preprocessing/main_data_preprocessing.R +++ b/preprocessing/main_data_preprocessing.R @@ -28,6 +28,7 @@ f_main_data_preprocessing <- function(username, expYear){ # print info +print("") print(paste0(">>>> start data pre-processing for measurement campaign: ", measCamp)) print("") diff --git a/preprocessing/main_flux_analysis.R b/preprocessing/main_flux_analysis.R index 04da2f0..52a41e7 100644 --- a/preprocessing/main_flux_analysis.R +++ b/preprocessing/main_flux_analysis.R @@ -8,18 +8,21 @@ # ------------------------------- Flux analysis ------------------------------------------------- # =============================================================================================== # -# 1. Combine all data and meta data---- +# print info +print("") +print(">>>> start flux analysis") +print("") + +# 1. Combine all data and meta data ---- # output: one dataframe that can be used for data analysis and visualisation -# source("/functions/combine_all_data_and_meta_data.R") +source("/functions/combine_all_data_and_meta_data.R") # to do: probably adjust code so it can run automatically # change output: add measCamp and date (line 124-134) # output should be stored here: /home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output/flux_analyses/all/ -# 2. Combine all measCamps ---- - -# 3. Visualise data ---- +# 2. Visualise data ---- # does not need to run automatically -#source("/functions/combine_all_meas_campaigns.R") +# .... preprocessing/functions/visualise_fluxes_from_goflux.R # TODO ? diff --git a/preprocessing/main_flux_calculation.R b/preprocessing/main_flux_calculation.R index 6562668..9b443ad 100644 --- a/preprocessing/main_flux_calculation.R +++ b/preprocessing/main_flux_calculation.R @@ -8,19 +8,30 @@ # ------------------------------- Flux calculation ---------------------------------------------- # =============================================================================================== # +# print info +print("") +print(">>>> start flux calculation") +print("") + # 1. Create input for goFlux package ---- source("functions/create_input_for_goflux_package.R") -# output: goflux_input_n2o_df + csv file -# (to do ONLY IF IT DOESN'T RUN AUTOMATICALLY : change output name of csv file: add current date (not time)) # 2. Calculate fluxes with goFlux package ---- if (calc_fluxes_with_goflux_package == "T"){ print("") print(">> prepare DF for and calculate fluxes with the goflux package") - + + if (updateGoFluxPkg == "T"){ + # install / update goFlux package (to ensure using the most recent version) + # install package remotes + if (!require("remotes", quietly = TRUE)) install.packages("remotes") + remotes::install_github("Qepanna/goFlux") + } + + # load library + library(goFlux) + # calc fluxes - # simply source the file and run code (not a function yet) - # output best_n2o_flux_df + pdf scatterplots and a .csv file saved to 'outDir' source("functions/calculate_fluxes_with_goflux_package.R") } diff --git a/preprocessing/run_all_main.R b/preprocessing/run_all_main.R index fbcc48d..90a7400 100644 --- a/preprocessing/run_all_main.R +++ b/preprocessing/run_all_main.R @@ -10,6 +10,8 @@ # script settings ---- # =============================================================================================== # +# 1. Script settings ---- + # list of measurement campaigns to run [YYYYMM] measCampList <- c("202305", "202307", "202309") @@ -32,11 +34,16 @@ create_plots_ghg_conc <- "T" # use the PGA data to simulate GC data, and run only specific filter/correction functions simulateGCdata <- "F" # [F,T] +# install / update goFlux package (to ensure using the most recent version) in 'main_flux_calculation.R' +updateGoFluxPkg <- "F" # [F,T] + # =============================================================================================== # # init & get main variables & settings, set directories, change into main dir, load R libraries ---- # =============================================================================================== # +# 2. Init main variables ---- + # set dirs # main working directory (version control with gitlab) # data dir (files with field data) @@ -48,12 +55,14 @@ if (username == "ntriches"){ dataDir <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/data" figsGasConcDir <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/figures_processing" outDir <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output" + fluxResOutDir <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/flux_analyses" } else if (username == "jengel"){ # Jan mainDir <- "/home/jengel/repo_git/nathi_data-analysis/preprocessing" dataDir <- "/home/jengel/nathalie/20231210-field-data" figsGasConcDir <- "/home/jengel/nathalie/20231210-figs_gas_conc" outDir <- "/home/jengel/nathalie/20231210-script-output" + fluxResOutDir <- paste0(outDir, "/flux-results-output") } else { # error catching stop("ERROR no valid username") @@ -70,8 +79,10 @@ source("additional_info/gas_id.txt") source("additional_info/fixed_values.txt") # load libraries +# the goFlux package is installed/updated in 'main_flux_calculation.R' if 'calc_fluxes_with_goflux_package == "T"' library(dplyr) library(tidyverse) +library(stringr) # optional install (if not already installed) and load dplyr package #if(!require(dplyr)){install.packages('dplyr')} #if(!require(tidyverse)){install.packages('tidyverse')} @@ -79,9 +90,6 @@ library(tidyverse) if (calc_fluxes_with_gasfluxes_package == "T"){ library(gasfluxes) } -if (calc_fluxes_with_goflux_package == "T"){ - # TODO move from functions/calculate_fluxes_with_gofluxyourself_package.R to here -} # basic R settings options("scipen"=100, "digits"=7) # force R not to use exponential notation for number of up to 7 digits @@ -91,6 +99,8 @@ options("scipen"=100, "digits"=7) # force R not to use exponential notation for # ------------------------------- Data pre-processing ------------------------------------------- # =============================================================================================== # +# 3. Data pre-processing ---- + source("main_data_preprocessing.R") # create new empty DF for data and metaData @@ -138,14 +148,22 @@ for (measCamp in measCampList){ # ------------------------------- Flux calculation ---------------------------------------------- # =============================================================================================== # +# 4. Flux calculation ---- +# source and execute the code +source("main_flux_calculation.R") # =============================================================================================== # # ------------------------------- Flux analysis ------------------------------------------------- # =============================================================================================== # +# 5. Flux analysis ---- +# source and execute the code +source("main_flux_analysis.R") - +# +# result DF: goflux_data_metadata_df provided by 'combine_all_data_and_meta_data.R' +# \ No newline at end of file -- GitLab From 8cee44db35b2f5efe4ff61fea7d3707de0c94ae5 Mon Sep 17 00:00:00 2001 From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de> Date: Tue, 20 Feb 2024 14:19:32 +0100 Subject: [PATCH 04/24] modify path settings for directories and remove correct_n2o_conc_by_meas_period.R * correct_n2o_conc_by_meas_period.R is not used * directories are now created if not existent * only structural changes, no functional changes --- .../additional_info/correction_filter_n2o.txt | 5 ++ .../additional_info/simulate_gc_n2o_data.txt | 7 ++- .../combine_all_data_and_meta_data.R | 3 +- .../correct_filter_n2o_conc_by_meas_period.R | 15 +++--- .../correct_n2o_conc_by_meas_period.R | 48 ------------------- preprocessing/functions/plot_ghg_conc_2023.R | 7 +++ preprocessing/main_data_preprocessing.R | 15 +++--- preprocessing/run_all_main.R | 31 ++++++++---- 8 files changed, 57 insertions(+), 74 deletions(-) delete mode 100644 preprocessing/functions/correct_n2o_conc_by_meas_period.R diff --git a/preprocessing/additional_info/correction_filter_n2o.txt b/preprocessing/additional_info/correction_filter_n2o.txt index daab75a..23b6a37 100644 --- a/preprocessing/additional_info/correction_filter_n2o.txt +++ b/preprocessing/additional_info/correction_filter_n2o.txt @@ -7,6 +7,11 @@ if (username == "ntriches"){ warningDir <- paste0(outDir, "/filter_correction_warning") } +# create directory if it does not exist +if (!dir.exists(warningDir)){ + dir.create(warningDir) +} + # N2O data parameter for correction/filter functions ## function 01 minLengthMPSec <- 180 # [seconds] diff --git a/preprocessing/additional_info/simulate_gc_n2o_data.txt b/preprocessing/additional_info/simulate_gc_n2o_data.txt index 3d1eb18..80c4dc9 100644 --- a/preprocessing/additional_info/simulate_gc_n2o_data.txt +++ b/preprocessing/additional_info/simulate_gc_n2o_data.txt @@ -5,7 +5,12 @@ if (username == "ntriches"){ simGCsampleDatainfoDir <- paste0(outDir, "/sim_gc_sample_data_info") } -# list of filter/correction functions to run with 'f_correct_n2o_conc_sorted_by_measurementperiods' +# create directory if it does not exist +if (!dir.exists(simGCsampleDatainfoDir)){ + dir.create(simGCsampleDatainfoDir) +} + +# list of filter/correction functions to run with 'f_correct_filter_n2o_conc_by_meas_period' simGCfilterCorrFunctions <- c(1, 2, 3, 4, 5, 8, 9, 10, 11, 12) # min length of the mp to be used for "GC simulation" diff --git a/preprocessing/functions/combine_all_data_and_meta_data.R b/preprocessing/functions/combine_all_data_and_meta_data.R index 342bf5e..64003fa 100644 --- a/preprocessing/functions/combine_all_data_and_meta_data.R +++ b/preprocessing/functions/combine_all_data_and_meta_data.R @@ -120,8 +120,9 @@ for (gf_meas_ID in goflux_best_n2o_flux_df$UniqueID) { # } # write DF goflux_data_metadata_df +# create directory if it does not exist write.table(goflux_data_metadata_df, - paste0(fluxResOutDir, "/all/", currentDateTime, "_goflux_n2o_data_metadata.csv"), + paste0(fluxResOutDir, "/", currentDateTime, "_goflux_n2o_data_metadata.csv"), row.names = FALSE, quote = FALSE, sep = ",") diff --git a/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R b/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R index d2ea39e..1d1a2b3 100644 --- a/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R +++ b/preprocessing/functions/correct_filter_n2o_conc_by_meas_period.R @@ -1,12 +1,13 @@ # main filter script for correcting GHG concentrations before flux calculation -f_correct_n2o_conc_sorted_by_measurementperiods <- function(measPeriodMetaDataDF_mID, - measPeriodN2o_mID, - measID, - mainDir, - logfile_n2o_DataCorrection, - username, - simulateGCdata){ +f_correct_filter_n2o_conc_by_meas_period <- function(measPeriodMetaDataDF_mID, + measPeriodN2o_mID, + measID, + mainDir, + outDir, + logfile_n2o_DataCorrection, + username, + simulateGCdata){ print(paste0(">> start N2O data filter/correction for mp: ", measID)) if (simulateGCdata == "T"){ diff --git a/preprocessing/functions/correct_n2o_conc_by_meas_period.R b/preprocessing/functions/correct_n2o_conc_by_meas_period.R deleted file mode 100644 index 7f455b8..0000000 --- a/preprocessing/functions/correct_n2o_conc_by_meas_period.R +++ /dev/null @@ -1,48 +0,0 @@ -# USE: correct measurement periods (mps) to obtain to most stable linear fit -# AIM: reduce noise from closing and opening chamber (hood) in the field - -correct_n2o_conc_sorted_by_mp <- function(measPeriodMetaDataDF_loc, measPeriodN2o_loc){ - - print("start correcting n2o conc measurement periods") - - # create empty (new) data frames for collection of all mps - allN2oMeasPeriodDF <- data.frame() - modMeasPeriodn2o <- data.frame() - - # time difference in the start ("front") of the mp - diffFront <- 20 # 10 rows (measurements) - # time difference in the start ("back") of the mp - diffBack <- 10 # 10 rows (measurements) - - # loop over all mps - for (iLine in 1:nrow(measPeriodMetaDataDF)){ - # choose only n2o mps measured with Aeris gas analyser - if(measPeriodMetaDataDF_loc$is_n2o_aeris[iLine] == 1){ - - # choose the correct line for the loop - meas_ID <- measPeriodMetaDataDF_loc$meas_ID[iLine] - # get the right amount of rows from the original df - rows <- which(measPeriodN2o_loc$meas_ID == meas_ID) - # find the first row = find the start of the individual mp - first_row <- min(rows) - # find the last row = find the end of the individual mp - last_row <- max(rows) - # add the new first row time step = add seconds to start time -> take seconds away - new_first_row <- first_row + diffFront - # subtract the new last row time step = subtract from end time -> take seconds away - new_last_row <- last_row - diffBack - # the result is a shorter mp - modMeasPeriodN2o <- measPeriodN2o_loc[new_first_row:new_last_row, ] - - # add mp data to DF with all mp - allN2oMeasPeriodDF <- rbind(allN2oMeasPeriodDF, modMeasPeriodN2o) - - } - } - - print("end correcting n2o-conc measurement periods") - return(allN2oMeasPeriodDF) -} - - - diff --git a/preprocessing/functions/plot_ghg_conc_2023.R b/preprocessing/functions/plot_ghg_conc_2023.R index 73d764c..7c819a9 100644 --- a/preprocessing/functions/plot_ghg_conc_2023.R +++ b/preprocessing/functions/plot_ghg_conc_2023.R @@ -242,8 +242,15 @@ f_plot_ghg_conc_2023_n2o_non_mod_mp <- function(process_n2o, print(">> plot GHG concentrations N2O") if (process_n2o == "T"){ + # # create output dirs if not existent + if (!dir.exists(paste0(figsGasConcDir, "/", expYear))){ + dir.create(paste0(figsGasConcDir, "/", expYear)) + } + + # get vector of measurement IDs meas_ID <- measPeriodN2o$meas_ID + # loop over measurement IDs for (i in unique(meas_ID)){ fileID <- i d <- subset(measPeriodN2o, meas_ID == i) diff --git a/preprocessing/main_data_preprocessing.R b/preprocessing/main_data_preprocessing.R index 0ec132a..1142a75 100644 --- a/preprocessing/main_data_preprocessing.R +++ b/preprocessing/main_data_preprocessing.R @@ -160,13 +160,14 @@ for (row in 1:nrow(measPeriodMetaDataDF)){ measID <- measPeriodMetaDataDF$meas_ID[row] rowsN2OData <- which(measPeriodN2o$meas_ID == measID) # returns a list of vector and data.frame - list_hlp <- f_correct_n2o_conc_sorted_by_measurementperiods(measPeriodMetaDataDF[row,], - measPeriodN2o[rowsN2OData,], - measID, - mainDir, - logfile_n2o_DataCorrection, - username, - simulateGCdata) + list_hlp <- f_correct_filter_n2o_conc_by_meas_period(measPeriodMetaDataDF[row,], + measPeriodN2o[rowsN2OData,], + measID, + mainDir, + outDir, + logfile_n2o_DataCorrection, + username, + simulateGCdata) # extract vector and data.frame from list measPeriodMetaDataDF[row,] <- as.data.frame(list_hlp[[1]]) measPeriodN2oCorr <- rbind(measPeriodN2oCorr, as.data.frame(list_hlp[[2]])) diff --git a/preprocessing/run_all_main.R b/preprocessing/run_all_main.R index 90a7400..c75c978 100644 --- a/preprocessing/run_all_main.R +++ b/preprocessing/run_all_main.R @@ -45,29 +45,40 @@ updateGoFluxPkg <- "F" # [F,T] # 2. Init main variables ---- # set dirs -# main working directory (version control with gitlab) -# data dir (files with field data) +# main working directory - the dir 'preprocessing/' in the git repository with these scripts +# data dir - files with field data +# outDir - main output directory # differentiate between users username <- Sys.getenv("USER") if (username == "ntriches"){ - # Nathalie mainDir <- "/home/ntriches/git_repo/data-analysis/preprocessing" dataDir <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/data" figsGasConcDir <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/figures_processing" outDir <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output" - fluxResOutDir <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/flux_analyses" + fluxResOutDir <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/flux_analyses/all" } else if (username == "jengel"){ - # Jan - mainDir <- "/home/jengel/repo_git/nathi_data-analysis/preprocessing" - dataDir <- "/home/jengel/nathalie/20231210-field-data" - figsGasConcDir <- "/home/jengel/nathalie/20231210-figs_gas_conc" - outDir <- "/home/jengel/nathalie/20231210-script-output" - fluxResOutDir <- paste0(outDir, "/flux-results-output") + # ## local + # mainDir <- "/home/jengel/repo_git/nathi_data-analysis/preprocessing" + # dataDir <- "/home/jengel/nathalie/20231210-field-data" + # figsGasConcDir <- "/home/jengel/nathalie/20231210-figs_gas_conc" + # outDir <- "/home/jengel/nathalie/20231210-script-output" + ## BGC HPC + mainDir <- "/Net/Groups/BSI/work_scratch/jengel/nathalie/repo_git/data-analysis/preprocessing" + dataDir <- "/Net/Groups/BSI/work_scratch/jengel/nathalie/data-analysis_files/field_data" + figsGasConcDir <- "/Net/Groups/BSI/work_scratch/jengel/nathalie/data-analysis_files/figs_gas_conc" + outDir <- "/Net/Groups/BSI/work_scratch/jengel/nathalie/data-analysis_files/script_output" + ## all + fluxResOutDir <- paste0(outDir, "/flux_results_output") } else { # error catching stop("ERROR no valid username") } +# create output dirs if not existent +if (!dir.exists(figsGasConcDir)) dir.create(figsGasConcDir) +if (!dir.exists(outDir)) dir.create(outDir) +if (!dir.exists(fluxResOutDir)) dir.create(fluxResOutDir) + # get date & time (e.g.: '20231211_195800') currentDateTime <- format(Sys.time(), "%Y%m%d_%k%M%S") -- GitLab From 9348836f83a66fec2ae75892c3cff50c29f99942 Mon Sep 17 00:00:00 2001 From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de> Date: Tue, 20 Feb 2024 15:23:30 +0100 Subject: [PATCH 05/24] minor changes and corrections --- .../calculate_fluxes_with_goflux_package.R | 2 +- .../combine_all_data_and_meta_data.R | 5 ++++ preprocessing/main_flux_analysis.R | 2 +- preprocessing/run_all_main.R | 28 +++++++++++-------- 4 files changed, 24 insertions(+), 13 deletions(-) diff --git a/preprocessing/functions/calculate_fluxes_with_goflux_package.R b/preprocessing/functions/calculate_fluxes_with_goflux_package.R index d139bfb..4113d85 100644 --- a/preprocessing/functions/calculate_fluxes_with_goflux_package.R +++ b/preprocessing/functions/calculate_fluxes_with_goflux_package.R @@ -17,8 +17,8 @@ goflux_input_n2o_df <- goflux_input_n2o_df %>% n2o_flux_result_df <- goFlux(goflux_input_n2o_df, "N2Odry_ppb", prec = 0.2) # choose best flux +# docu: ?best.flux() goflux_best_n2o_flux_df <- best.flux(flux.result = n2o_flux_result_df, g.limit = 1.25) -?best.flux # plot results of best fluxes plot_list <- flux.plot(flux.results = goflux_best_n2o_flux_df, dataframe = goflux_input_n2o_df, gastype = "N2Odry_ppb", diff --git a/preprocessing/functions/combine_all_data_and_meta_data.R b/preprocessing/functions/combine_all_data_and_meta_data.R index 64003fa..e5f497b 100644 --- a/preprocessing/functions/combine_all_data_and_meta_data.R +++ b/preprocessing/functions/combine_all_data_and_meta_data.R @@ -5,6 +5,11 @@ # # output DF: goflux_data_metadata_df # +# colnames(goflux_data_metadata_df) +# [1] "datetime" "meas_ID" "plot_no" "micro_habitat" "par" "light_dark" +# [7] "soil_moisture_12cm" "soil_moisture_30cm" "soil_temperature_15cm_s1" "soil_temperature_15cm_s2" "soil_temperature_15cm_s3" "soil_temperature_15cm_s4" +# [13] "ch4_µmolm2sec1" "co2_µmolm2sec1" "n2o_nmolm2sec1" "n2o_LMFlux" "n2o_HMFlux" "n2o_bestFlux" +# [19] "n2o_bestModel" "measCamp" # create df with all needed variables: diff --git a/preprocessing/main_flux_analysis.R b/preprocessing/main_flux_analysis.R index 52a41e7..15e725a 100644 --- a/preprocessing/main_flux_analysis.R +++ b/preprocessing/main_flux_analysis.R @@ -15,7 +15,7 @@ print("") # 1. Combine all data and meta data ---- # output: one dataframe that can be used for data analysis and visualisation -source("/functions/combine_all_data_and_meta_data.R") +source("functions/combine_all_data_and_meta_data.R") # to do: probably adjust code so it can run automatically # change output: add measCamp and date (line 124-134) # output should be stored here: /home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output/flux_analyses/all/ diff --git a/preprocessing/run_all_main.R b/preprocessing/run_all_main.R index c75c978..3bf04ac 100644 --- a/preprocessing/run_all_main.R +++ b/preprocessing/run_all_main.R @@ -35,7 +35,7 @@ create_plots_ghg_conc <- "T" simulateGCdata <- "F" # [F,T] # install / update goFlux package (to ensure using the most recent version) in 'main_flux_calculation.R' -updateGoFluxPkg <- "F" # [F,T] +updateGoFluxPkg <- "T" # [F,T] # =============================================================================================== # @@ -57,18 +57,24 @@ if (username == "ntriches"){ outDir <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output" fluxResOutDir <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/flux_analyses/all" } else if (username == "jengel"){ - # ## local - # mainDir <- "/home/jengel/repo_git/nathi_data-analysis/preprocessing" - # dataDir <- "/home/jengel/nathalie/20231210-field-data" - # figsGasConcDir <- "/home/jengel/nathalie/20231210-figs_gas_conc" - # outDir <- "/home/jengel/nathalie/20231210-script-output" - ## BGC HPC - mainDir <- "/Net/Groups/BSI/work_scratch/jengel/nathalie/repo_git/data-analysis/preprocessing" - dataDir <- "/Net/Groups/BSI/work_scratch/jengel/nathalie/data-analysis_files/field_data" - figsGasConcDir <- "/Net/Groups/BSI/work_scratch/jengel/nathalie/data-analysis_files/figs_gas_conc" - outDir <- "/Net/Groups/BSI/work_scratch/jengel/nathalie/data-analysis_files/script_output" + jePlace <- "bgchpc" # [local,bgchpc] + if (jePlace == "local"){ + mainDir <- "/home/jengel/repo_git/nathi_data-analysis/preprocessing" + dataDir <- "/home/jengel/nathalie/20231210-field-data" + figsGasConcDir <- "/home/jengel/nathalie/20231210-figs_gas_conc" + outDir <- "/home/jengel/nathalie/20231210-script-output" + } else if (jePlace == "bgchpc"){ + mainDir <- "/Net/Groups/BSI/work_scratch/jengel/nathalie/repo_git/data-analysis/preprocessing" + dataDir <- "/Net/Groups/BSI/work_scratch/jengel/nathalie/data-analysis_files/field_data" + figsGasConcDir <- "/Net/Groups/BSI/work_scratch/jengel/nathalie/data-analysis_files/figs_gas_conc" + outDir <- "/Net/Groups/BSI/work_scratch/jengel/nathalie/data-analysis_files/script_output" + # using Rstudio via ssh needs: + .libPaths(c("/Net/Groups/BSI/work_scratch/quincy/model/software/r_packages/r_4.3.x", "/Net/Groups/BSI/work_scratch/jengel/software/r_packages/r_4.3.x", "/opt/ohpc/pub/libs/gnu12/R/4.3.2/lib64/R/library")) + } ## all fluxResOutDir <- paste0(outDir, "/flux_results_output") + ## overwrite default setting + updateGoFluxPkg <- "F" } else { # error catching stop("ERROR no valid username") -- GitLab From 424a5e40cda8a651570b5b02fd281d8013f288b5 Mon Sep 17 00:00:00 2001 From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de> Date: Tue, 20 Feb 2024 16:14:00 +0100 Subject: [PATCH 06/24] bugfix: the function flux.plot() takes a max of 5 arguments for the legend --- preprocessing/functions/calculate_fluxes_with_goflux_package.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/preprocessing/functions/calculate_fluxes_with_goflux_package.R b/preprocessing/functions/calculate_fluxes_with_goflux_package.R index 4113d85..b6953c6 100644 --- a/preprocessing/functions/calculate_fluxes_with_goflux_package.R +++ b/preprocessing/functions/calculate_fluxes_with_goflux_package.R @@ -23,7 +23,8 @@ goflux_best_n2o_flux_df <- best.flux(flux.result = n2o_flux_result_df, g.limit # plot results of best fluxes plot_list <- flux.plot(flux.results = goflux_best_n2o_flux_df, dataframe = goflux_input_n2o_df, gastype = "N2Odry_ppb", shoulder = 20, - plot.legend = c("MAE", "RMSE", "AICc", "SE", "r2", "g.factor"), + plot.legend = c("RMSE", "AICc", "SE", "r2", "g.factor"), # a maximum of 5 additional parameters can be displayed above the plot + #plot.legend = c("MAE", "RMSE", "AICc", "SE", "r2", "g.factor"), best.model = TRUE) # save output file as pdf - figures (this function does save files to the 'present working dir' - hence, changing into 'outDir') -- GitLab From 59523206fd0722a03d06481dd46c72669124bf24 Mon Sep 17 00:00:00 2001 From: Nathalie Triches <ntriches@bgc-jena.mpg.de> Date: Tue, 20 Feb 2024 21:00:01 +0200 Subject: [PATCH 07/24] change output dir ntriches * one subfolder was missing so output couldn't be saved * currently, the output produces the same error I've been fighting with before * next step: fix that error, then try GC simulation --- preprocessing/run_all_main.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/preprocessing/run_all_main.R b/preprocessing/run_all_main.R index 3bf04ac..6a17334 100644 --- a/preprocessing/run_all_main.R +++ b/preprocessing/run_all_main.R @@ -55,7 +55,7 @@ if (username == "ntriches"){ dataDir <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/data" figsGasConcDir <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/figures_processing" outDir <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output" - fluxResOutDir <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/flux_analyses/all" + fluxResOutDir <- "/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output/flux_analyses/all" } else if (username == "jengel"){ jePlace <- "bgchpc" # [local,bgchpc] if (jePlace == "local"){ -- GitLab From 9c2d7b5cb3d40ae4482dd3028c4e7366611c002c Mon Sep 17 00:00:00 2001 From: Nathalie Triches <ntriches@bgc-jena.mpg.de> Date: Wed, 21 Feb 2024 12:45:59 +0200 Subject: [PATCH 08/24] unsuccessful trial to find output error * uncommented a few lines for trial, unsuccessful * unsure where the mistake is and where to look for it * output error: first row of measCamp 202307 (or 202309) overwrites the flux estimate from the last row of the previous measCamp 202305 (or 202307) * error is only in data from goflux output, not in data from measCampMetaData. --- .../functions/combine_all_data_and_meta_data.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/preprocessing/functions/combine_all_data_and_meta_data.R b/preprocessing/functions/combine_all_data_and_meta_data.R index e5f497b..20d060e 100644 --- a/preprocessing/functions/combine_all_data_and_meta_data.R +++ b/preprocessing/functions/combine_all_data_and_meta_data.R @@ -146,12 +146,12 @@ goflux_data_metadata_df$light_dark[which(goflux_data_metadata_df$light_dark=="da goflux_data_metadata_df$micro_habitat[which(goflux_data_metadata_df$micro_habitat=="palsa-moss")] <- "palsa_moss" # remove first mp / row -# goflux_data_metadata_df <- goflux_data_metadata_df[-1, ] +goflux_data_metadata_df <- goflux_data_metadata_df[-1, ] # remove values below -1000 goflux_data_metadata_df <- goflux_data_metadata_df %>% filter(n2o_nmolm2sec1 > -500) - +# NAs <- complete.cases(goflux_data_metadata_df) goflux_data_metadata_df <- goflux_data_metadata_df %>% filter(!NAs) @@ -164,15 +164,15 @@ goflux_data_metadata_df <- goflux_data_metadata_df %>% measCampSpring <- goflux_data_metadata_df %>% filter(str_detect(datetime, "2023-05")) %>% - mutate(measCamp="May") + mutate(measCamp="May") measCampSummer <- goflux_data_metadata_df %>% filter(str_detect(datetime, "2023-07")) %>% - mutate(measCamp="July") + mutate(measCamp="July") measCampAutumn <- goflux_data_metadata_df %>% filter(str_detect(datetime, "2023-09")) %>% - mutate(measCamp="September") + mutate(measCamp="September") # combine data frames from all measurement campaigns to one data set goflux_data_metadata_df <- measCampSpring -- GitLab From ae1ac598ce3dcdf68ea2cace817cd3f67e2d3cf6 Mon Sep 17 00:00:00 2001 From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de> Date: Wed, 21 Feb 2024 14:57:39 +0100 Subject: [PATCH 09/24] create more output files from flux calculations and write all of it to the 'fluxResOutDir' * rename the output files from flux calc, now incl. all measCamp --- .../calculate_fluxes_with_goflux_package.R | 12 ++++++------ .../functions/combine_all_data_and_meta_data.R | 15 ++++++++------- preprocessing/run_all_main.R | 6 ++++++ 3 files changed, 20 insertions(+), 13 deletions(-) diff --git a/preprocessing/functions/calculate_fluxes_with_goflux_package.R b/preprocessing/functions/calculate_fluxes_with_goflux_package.R index b6953c6..6321d94 100644 --- a/preprocessing/functions/calculate_fluxes_with_goflux_package.R +++ b/preprocessing/functions/calculate_fluxes_with_goflux_package.R @@ -28,14 +28,14 @@ plot_list <- flux.plot(flux.results = goflux_best_n2o_flux_df, dataframe = goflu best.model = TRUE) # save output file as pdf - figures (this function does save files to the 'present working dir' - hence, changing into 'outDir') -setwd(outDir) +setwd(fluxResOutDir) flux2pdf(plot.list = plot_list, outfile = NULL, width = 11.6, height = 8.2) setwd(mainDir) -# save output files - flux calculations -# write.table(n2o_flux_result, -# paste0(outDir, "/measCamp_", measCamp, "goflux_goflux_output.csv"), -# row.names = FALSE, quote = FALSE, sep = ";", col.names = TRUE, na = "NA") +# save output file from flux calculations +write.table(n2o_flux_result, + paste0(fluxResOutDir, "/measCamp_", measCampListString, "goflux_goflux_n2o_output.csv"), + row.names = FALSE, quote = FALSE, sep = ";", col.names = TRUE, na = "NA") write.table(goflux_best_n2o_flux_df, - paste0(outDir, "/measCamp_", measCamp, "goflux_bestflux_output.csv"), + paste0(fluxResOutDir, "/measCamp_", measCampListString, "goflux_bestflux_n2o_output.csv"), row.names = FALSE, quote = FALSE, sep = ";", col.names = TRUE, na = "NA") diff --git a/preprocessing/functions/combine_all_data_and_meta_data.R b/preprocessing/functions/combine_all_data_and_meta_data.R index 20d060e..5dbf86d 100644 --- a/preprocessing/functions/combine_all_data_and_meta_data.R +++ b/preprocessing/functions/combine_all_data_and_meta_data.R @@ -125,9 +125,8 @@ for (gf_meas_ID in goflux_best_n2o_flux_df$UniqueID) { # } # write DF goflux_data_metadata_df -# create directory if it does not exist write.table(goflux_data_metadata_df, - paste0(fluxResOutDir, "/", currentDateTime, "_goflux_n2o_data_metadata.csv"), + paste0(fluxResOutDir, "/", currentDateTime, "_", measCampListString, "goflux_n2o_data_metadata.csv"), row.names = FALSE, quote = FALSE, sep = ",") @@ -145,13 +144,10 @@ goflux_data_metadata_df$light_dark[which(goflux_data_metadata_df$light_dark=="da # change palsa-moss to palsa_moss goflux_data_metadata_df$micro_habitat[which(goflux_data_metadata_df$micro_habitat=="palsa-moss")] <- "palsa_moss" -# remove first mp / row -goflux_data_metadata_df <- goflux_data_metadata_df[-1, ] - # remove values below -1000 goflux_data_metadata_df <- goflux_data_metadata_df %>% filter(n2o_nmolm2sec1 > -500) -# +# remove any line that contains NA NAs <- complete.cases(goflux_data_metadata_df) goflux_data_metadata_df <- goflux_data_metadata_df %>% filter(!NAs) @@ -178,7 +174,12 @@ measCampAutumn <- goflux_data_metadata_df %>% goflux_data_metadata_df <- measCampSpring goflux_data_metadata_df <- rbind(goflux_data_metadata_df, measCampSummer, measCampAutumn) -# TODO write out the DF as file ? +# write corrected DF goflux_data_metadata_df +write.table(goflux_data_metadata_df, + paste0(fluxResOutDir, "/", currentDateTime, "_", measCampListString, "goflux_n2o_data_metadata_corr.csv"), + row.names = FALSE, + quote = FALSE, + sep = ",") print("") print(">>> finished <<<") diff --git a/preprocessing/run_all_main.R b/preprocessing/run_all_main.R index 6a17334..ae47d21 100644 --- a/preprocessing/run_all_main.R +++ b/preprocessing/run_all_main.R @@ -44,6 +44,12 @@ updateGoFluxPkg <- "T" # [F,T] # 2. Init main variables ---- +# create string with all measCamp +measCampListString <- character() +for (mc in measCampList){ + measCampListString <- paste0(measCampListString, mc, "_") +} + # set dirs # main working directory - the dir 'preprocessing/' in the git repository with these scripts # data dir - files with field data -- GitLab From f57611fdc7be99ffba72a8e3bd33e1d7100c50ac Mon Sep 17 00:00:00 2001 From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de> Date: Wed, 21 Feb 2024 15:05:57 +0100 Subject: [PATCH 10/24] bugfix DF name n2o_flux_result_df in write() function --- preprocessing/functions/calculate_fluxes_with_goflux_package.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/preprocessing/functions/calculate_fluxes_with_goflux_package.R b/preprocessing/functions/calculate_fluxes_with_goflux_package.R index 6321d94..0d10072 100644 --- a/preprocessing/functions/calculate_fluxes_with_goflux_package.R +++ b/preprocessing/functions/calculate_fluxes_with_goflux_package.R @@ -33,7 +33,7 @@ flux2pdf(plot.list = plot_list, outfile = NULL, width = 11.6, height = 8.2) setwd(mainDir) # save output file from flux calculations -write.table(n2o_flux_result, +write.table(n2o_flux_result_df, paste0(fluxResOutDir, "/measCamp_", measCampListString, "goflux_goflux_n2o_output.csv"), row.names = FALSE, quote = FALSE, sep = ";", col.names = TRUE, na = "NA") write.table(goflux_best_n2o_flux_df, -- GitLab From 6827de02ad05094942846237a4f0094cd15c45e8 Mon Sep 17 00:00:00 2001 From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de> Date: Wed, 21 Feb 2024 16:10:15 +0100 Subject: [PATCH 11/24] re-write and error-correct data-filling of goflux_data_metadata_df in combine_all_data_and_meta_data.R * not yet tested, but should work --- .../combine_all_data_and_meta_data.R | 184 ++++++++---------- 1 file changed, 77 insertions(+), 107 deletions(-) diff --git a/preprocessing/functions/combine_all_data_and_meta_data.R b/preprocessing/functions/combine_all_data_and_meta_data.R index 5dbf86d..ea06821 100644 --- a/preprocessing/functions/combine_all_data_and_meta_data.R +++ b/preprocessing/functions/combine_all_data_and_meta_data.R @@ -1,7 +1,8 @@ -# combine all data and meta data +# +# combine all data and meta data needed for analysis into one DF # # -# input DF: goflux_best_n2o_flux_df +# input DF: goflux_best_n2o_flux_df & measPeriodMetaDataDF # # output DF: goflux_data_metadata_df # @@ -12,117 +13,86 @@ # [19] "n2o_bestModel" "measCamp" -# create df with all needed variables: -# measurement ID, plot number, micro habitat, -# PAR, light or dark measurement (L / D), -# soil moisture (VWC12 and VWC30), soil temperature (1-4) -# flux estimates for N2O, CH4, CO2 -# add start_time to plot the whole year +# number of measurement periods (mp) +goflux_total_mp <- nrow(goflux_best_n2o_flux_df) +print(paste0(" number of mp: ", goflux_total_mp)) -measurement_periods_total <- nrow(goflux_best_n2o_flux_df) -print(paste0("number of mp: ", measurement_periods_total)) +# output summary of DF to screen str(goflux_best_n2o_flux_df) -# run this for all measurement campaigns +print("") + +# create new DF with selected output variables goflux_data_metadata_df <- data.frame( - datetime = as.POSIXct(rep("1000-01-01 00:00:00", measurement_periods_total), format = "%Y-%m-%d %H:%M:%S", tz="UTC" ), - meas_ID = rep(NA, measurement_periods_total), - plot_no = rep(NA, measurement_periods_total), - micro_habitat = rep(NA, measurement_periods_total), - par = rep(NA, measurement_periods_total), - light_dark = rep(NA, measurement_periods_total), - soil_moisture_12cm = rep(NA, measurement_periods_total), - soil_moisture_30cm = rep(NA, measurement_periods_total), - soil_temperature_15cm_s1 = rep(NA, measurement_periods_total), - soil_temperature_15cm_s2 = rep(NA, measurement_periods_total), - soil_temperature_15cm_s3 = rep(NA, measurement_periods_total), - soil_temperature_15cm_s4 = rep(NA, measurement_periods_total), - ch4_µmolm2sec1 = rep(NA, measurement_periods_total), - co2_µmolm2sec1 = rep(NA, measurement_periods_total), - n2o_nmolm2sec1 = rep(NA, measurement_periods_total), - n2o_LMFlux = rep(NA, measurement_periods_total), - n2o_HMFlux = rep(NA, measurement_periods_total), - n2o_bestFlux = rep(NA, measurement_periods_total), - n2o_bestModel = rep(NA, measurement_periods_total) + datetime = as.POSIXct(rep("1000-01-01 00:00:00", goflux_total_mp), format = "%Y-%m-%d %H:%M:%S", tz="UTC" ), + meas_ID = rep(NA, goflux_total_mp), + plot_no = rep(NA, goflux_total_mp), + micro_habitat = rep(NA, goflux_total_mp), + par = rep(NA, goflux_total_mp), + light_dark = rep(NA, goflux_total_mp), + soil_moisture_12cm = rep(NA, goflux_total_mp), + soil_moisture_30cm = rep(NA, goflux_total_mp), + soil_temperature_15cm_s1 = rep(NA, goflux_total_mp), + soil_temperature_15cm_s2 = rep(NA, goflux_total_mp), + soil_temperature_15cm_s3 = rep(NA, goflux_total_mp), + soil_temperature_15cm_s4 = rep(NA, goflux_total_mp), + ch4_µmolm2sec1 = rep(NA, goflux_total_mp), + co2_µmolm2sec1 = rep(NA, goflux_total_mp), + n2o_nmolm2sec1 = rep(NA, goflux_total_mp), + n2o_LMFlux = rep(NA, goflux_total_mp), + n2o_HMFlux = rep(NA, goflux_total_mp), + n2o_bestFlux = rep(NA, goflux_total_mp), + n2o_bestModel = rep(NA, goflux_total_mp) ) -# fill in values from measPeriodMetaDataDF (same amount of rows) and -# goflux_best_n2o_flux_df with best flux = N2O nmol -# remove invalid mps -measPeriodMetaDataDF <- measPeriodMetaDataDF %>% - filter(is_mp_valid == 1) - -# loop over all mp provided by goflux package output individually -for (gf_meas_ID in goflux_best_n2o_flux_df$UniqueID) { - #print(gf_meas_ID) - # get the rows corresponding with the gf_meas_ID - line_Num_gf_results_df <- which(goflux_best_n2o_flux_df$UniqueID == gf_meas_ID) - line_Num_metadata_df <- which(measPeriodMetaDataDF$meas_ID == gf_meas_ID) - - # choose only n2o mps measured with Aeris gas analyser - # if(measPeriodMetaDataDF$is_n2o_aeris[line_Num_metadata_df] == 1){ - - goflux_data_metadata_df$datetime[line_Num_gf_results_df] <- measPeriodMetaDataDF$datetime_mp_first[line_Num_metadata_df] # datetime from measurement start - #goflux_data_metadata_df$datetime[line_Num_gf_results_df] <- as.POSIXct(measPeriodMetaDataDF$datetime_mp_first[line_Num_metadata_df], format = "%Y-%m-%d %H:%M:%S", tz="UTC") - goflux_data_metadata_df$meas_ID[line_Num_gf_results_df] <- measPeriodMetaDataDF$meas_ID[line_Num_metadata_df] - goflux_data_metadata_df$plot_no[line_Num_gf_results_df] <- measPeriodMetaDataDF$plot_no[line_Num_metadata_df] - goflux_data_metadata_df$micro_habitat[line_Num_gf_results_df] <- measPeriodMetaDataDF$micro_habitat[line_Num_metadata_df] - goflux_data_metadata_df$light_dark[line_Num_gf_results_df] <- measPeriodMetaDataDF$light_dark[line_Num_metadata_df] - goflux_data_metadata_df$n2o_nmolm2sec1[line_Num_gf_results_df] <- goflux_best_n2o_flux_df$best.flux[line_Num_metadata_df] - goflux_data_metadata_df$n2o_LMFlux[line_Num_gf_results_df] <- goflux_best_n2o_flux_df$LM.flux[line_Num_metadata_df] - goflux_data_metadata_df$n2o_HMFlux[line_Num_gf_results_df] <- goflux_best_n2o_flux_df$HM.flux[line_Num_metadata_df] - goflux_data_metadata_df$n2o_bestFlux[line_Num_gf_results_df] <- goflux_best_n2o_flux_df$best.flux[line_Num_metadata_df] - goflux_data_metadata_df$n2o_bestModel[line_Num_gf_results_df] <- goflux_best_n2o_flux_df$model[line_Num_metadata_df] -} +# # remove invalid mp from metDataDF +# measPeriodMetaDataDF <- measPeriodMetaDataDF %>% +# filter(is_mp_valid == 1) -# fill in values from ppN2OdataDF (differing amount of rows) -# loop over all mp provided by gas_fluxes pkg output individually -for (gf_meas_ID in goflux_best_n2o_flux_df$UniqueID) { - line_Num_metadata_df <- which(measPeriodMetaDataDF$meas_ID == gf_meas_ID) +# fill the 'goflux_data_metadata_df' with data from three different DF +# goflux_best_n2o_flux_df << goFlux +# measPeriodMetaDataDF << metaData +# ppN2OdataDF << environmental data +# +# loop over all mp provided by goflux-package output +for (goflux_row in 1:goflux_total_mp) { + # get and output UniqueID + gf_uID <- goflux_best_n2o_flux_df$UniqueID[goflux_row] + #print(gf_uID) + + # get the rows of goFlux output and metDataDF corresponding with the gf_uID + rowNum_goFluxDF <- goflux_row + rowNum_metaDataDF <- which(measPeriodMetaDataDF$meas_ID == gf_uID) + rowNum_ppN2OdataDF <- which(ppN2OdataDF$meas_ID == gf_uID) + + # test if meas_ID does exist (only once) in metaDataDF & ppN2OdataDF + if (! length(rowNum_metaDataDF) == 1) stop(paste0("ERROR no match of measID in metaDataDF and UniqueID of goFlux, UniqueID: ", gf_uID)) + if (! length(rowNum_ppN2OdataDF) == 1) stop(paste0("ERROR no match of measID in ppN2OdataDF and UniqueID of goFlux, UniqueID: ", gf_uID)) - # choose only n2o mps measured with Aeris gas analyser - if(measPeriodMetaDataDF$is_n2o_aeris[line_Num_metadata_df] == 1){ - - # get the rows (with all measurements) corresponding with the gf_meas_ID - row_target <- which(goflux_data_metadata_df$meas_ID == gf_meas_ID) - # docu - rows_source <- which(ppN2OdataDF$meas_ID == gf_meas_ID) - - # for all these below: we need the average for the whole measPeriod - # ideally just some time in the middle of the measurement (think about it) - goflux_data_metadata_df$par[row_target] <- median(ppN2OdataDF$par[rows_source], na.rm = FALSE) - goflux_data_metadata_df$soil_moisture_12cm[row_target] <- median(ppN2OdataDF$vwc12[rows_source], na.rm = FALSE) - goflux_data_metadata_df$soil_moisture_30cm[row_target] <- median(ppN2OdataDF$vwc30[rows_source], na.rm = FALSE) - goflux_data_metadata_df$soil_temperature_15cm_s1[row_target] <- median(ppN2OdataDF$soilT1degC[rows_source], na.rm = FALSE) - goflux_data_metadata_df$soil_temperature_15cm_s2[row_target] <- median(ppN2OdataDF$soilT2degC[rows_source], na.rm = FALSE) - goflux_data_metadata_df$soil_temperature_15cm_s3[row_target] <- median(ppN2OdataDF$soilT3degC[rows_source], na.rm = FALSE) - goflux_data_metadata_df$soil_temperature_15cm_s4[row_target] <- median(ppN2OdataDF$soilT4degC[rows_source], na.rm = FALSE) - } -} + # goflux_best_n2o_flux_df << goFlux + goflux_data_metadata_df$n2o_nmolm2sec1[goflux_row] <- goflux_best_n2o_flux_df$best.flux[rowNum_goFluxDF] + goflux_data_metadata_df$n2o_LMFlux[goflux_row] <- goflux_best_n2o_flux_df$LM.flux[rowNum_goFluxDF] + goflux_data_metadata_df$n2o_HMFlux[goflux_row] <- goflux_best_n2o_flux_df$HM.flux[rowNum_goFluxDF] + goflux_data_metadata_df$n2o_bestFlux[goflux_row] <- goflux_best_n2o_flux_df$best.flux[rowNum_goFluxDF] + goflux_data_metadata_df$n2o_bestModel[goflux_row] <- goflux_best_n2o_flux_df$model[rowNum_goFluxDF] + + # measPeriodMetaDataDF << metaData + goflux_data_metadata_df$datetime[goflux_row] <- measPeriodMetaDataDF$datetime_mp_first[rowNum_metaDataDF] # datetime from measurement start + #goflux_data_metadata_df$datetime[goflux_row] <- as.POSIXct(measPeriodMetaDataDF$datetime_mp_first[rowNum_metaDataDF], format = "%Y-%m-%d %H:%M:%S", tz="UTC") + goflux_data_metadata_df$meas_ID[goflux_row] <- measPeriodMetaDataDF$meas_ID[rowNum_metaDataDF] + goflux_data_metadata_df$plot_no[goflux_row] <- measPeriodMetaDataDF$plot_no[rowNum_metaDataDF] + goflux_data_metadata_df$micro_habitat[goflux_row] <- measPeriodMetaDataDF$micro_habitat[rowNum_metaDataDF] + goflux_data_metadata_df$light_dark[goflux_row] <- measPeriodMetaDataDF$light_dark[rowNum_metaDataDF] + + # ppN2OdataDF << environmental data + goflux_data_metadata_df$par[goflux_row] <- median(ppN2OdataDF$par[rowNum_ppN2OdataDF], na.rm = FALSE) + goflux_data_metadata_df$soil_moisture_12cm[goflux_row] <- median(ppN2OdataDF$vwc12[rowNum_ppN2OdataDF], na.rm = FALSE) + goflux_data_metadata_df$soil_moisture_30cm[goflux_row] <- median(ppN2OdataDF$vwc30[rowNum_ppN2OdataDF], na.rm = FALSE) + goflux_data_metadata_df$soil_temperature_15cm_s1[goflux_row] <- median(ppN2OdataDF$soilT1degC[rowNum_ppN2OdataDF], na.rm = FALSE) + goflux_data_metadata_df$soil_temperature_15cm_s2[goflux_row] <- median(ppN2OdataDF$soilT2degC[rowNum_ppN2OdataDF], na.rm = FALSE) + goflux_data_metadata_df$soil_temperature_15cm_s3[goflux_row] <- median(ppN2OdataDF$soilT3degC[rowNum_ppN2OdataDF], na.rm = FALSE) + goflux_data_metadata_df$soil_temperature_15cm_s4[goflux_row] <- median(ppN2OdataDF$soilT4degC[rowNum_ppN2OdataDF], na.rm = FALSE) -# check if it works for all measCamp -# # loop over all mps -# for (iLine in 1:nrow(measPeriodMetaDataDF)){ -# # choose only n2o mps measured with Aeris gas analyser -# if(measPeriodMetaDataDF$is_n2o_aeris[iLine] == 1){ -# -# # get the meas_ID, ID or the mp, of this line -# meas_ID <- measPeriodMetaDataDF$meas_ID[iLine] -# # get the rows (with all measurements) corresponding with the meas_ID -# row_target <- which(goflux_data_metadata_df$meas_ID == meas_ID) -# # docu -# rows_source <- which(measPeriodN2o$meas_ID == meas_ID) -# -# # for all these below: we need the average for the whole measPeriod -# # ideally just some time in the middle of the measurement (think about it) -# goflux_data_metadata_df$par[row_target] <- median(measPeriodN2o$par[rows_source], na.rm = FALSE) -# goflux_data_metadata_df$soil_moisture_12cm[row_target] <- median(measPeriodN2o$vwc12[rows_source], na.rm = FALSE) -# goflux_data_metadata_df$soil_moisture_30cm[row_target] <- median(measPeriodN2o$vwc30[rows_source], na.rm = FALSE) -# goflux_data_metadata_df$soil_temperature_15cm_s1[row_target] <- median(measPeriodN2o$soilT1degC[rows_source], na.rm = FALSE) -# goflux_data_metadata_df$soil_temperature_15cm_s2[row_target] <- median(measPeriodN2o$soilT2degC[rows_source], na.rm = FALSE) -# goflux_data_metadata_df$soil_temperature_15cm_s3[row_target] <- median(measPeriodN2o$soilT3degC[rows_source], na.rm = FALSE) -# goflux_data_metadata_df$soil_temperature_15cm_s4[row_target] <- median(measPeriodN2o$soilT4degC[rows_source], na.rm = FALSE) -# } -# } +} # write DF goflux_data_metadata_df write.table(goflux_data_metadata_df, @@ -148,9 +118,9 @@ goflux_data_metadata_df$micro_habitat[which(goflux_data_metadata_df$micro_habita goflux_data_metadata_df <- goflux_data_metadata_df %>% filter(n2o_nmolm2sec1 > -500) # remove any line that contains NA -NAs <- complete.cases(goflux_data_metadata_df) +rowContainsNoNA <- complete.cases(goflux_data_metadata_df) goflux_data_metadata_df <- goflux_data_metadata_df %>% - filter(!NAs) + filter(rowContainsNoNA) # create new DF - one per measCampaign -- GitLab From 8af9c3ec308684ef96a2d1d0692c8e0369a67b12 Mon Sep 17 00:00:00 2001 From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de> Date: Wed, 21 Feb 2024 16:38:32 +0100 Subject: [PATCH 12/24] minor additional changes in creating the results DF --- .../combine_all_data_and_meta_data.R | 34 ++++++++++--------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/preprocessing/functions/combine_all_data_and_meta_data.R b/preprocessing/functions/combine_all_data_and_meta_data.R index ea06821..2b7dd10 100644 --- a/preprocessing/functions/combine_all_data_and_meta_data.R +++ b/preprocessing/functions/combine_all_data_and_meta_data.R @@ -23,25 +23,28 @@ print("") # create new DF with selected output variables goflux_data_metadata_df <- data.frame( + # metaData datetime = as.POSIXct(rep("1000-01-01 00:00:00", goflux_total_mp), format = "%Y-%m-%d %H:%M:%S", tz="UTC" ), meas_ID = rep(NA, goflux_total_mp), plot_no = rep(NA, goflux_total_mp), micro_habitat = rep(NA, goflux_total_mp), - par = rep(NA, goflux_total_mp), light_dark = rep(NA, goflux_total_mp), + # goFlux + #ch4_mumolm2sec1 = rep(NA, goflux_total_mp), # TODO + #co2_mumolm2sec1 = rep(NA, goflux_total_mp), # TODO + n2o_nmolm2sec1 = rep(NA, goflux_total_mp), + n2o_LMFlux = rep(NA, goflux_total_mp), + n2o_HMFlux = rep(NA, goflux_total_mp), + n2o_bestFlux = rep(NA, goflux_total_mp), + n2o_bestModel = rep(NA, goflux_total_mp) + # environmental data + par = rep(NA, goflux_total_mp), soil_moisture_12cm = rep(NA, goflux_total_mp), soil_moisture_30cm = rep(NA, goflux_total_mp), soil_temperature_15cm_s1 = rep(NA, goflux_total_mp), soil_temperature_15cm_s2 = rep(NA, goflux_total_mp), soil_temperature_15cm_s3 = rep(NA, goflux_total_mp), soil_temperature_15cm_s4 = rep(NA, goflux_total_mp), - ch4_µmolm2sec1 = rep(NA, goflux_total_mp), - co2_µmolm2sec1 = rep(NA, goflux_total_mp), - n2o_nmolm2sec1 = rep(NA, goflux_total_mp), - n2o_LMFlux = rep(NA, goflux_total_mp), - n2o_HMFlux = rep(NA, goflux_total_mp), - n2o_bestFlux = rep(NA, goflux_total_mp), - n2o_bestModel = rep(NA, goflux_total_mp) ) # # remove invalid mp from metDataDF @@ -67,22 +70,21 @@ for (goflux_row in 1:goflux_total_mp) { # test if meas_ID does exist (only once) in metaDataDF & ppN2OdataDF if (! length(rowNum_metaDataDF) == 1) stop(paste0("ERROR no match of measID in metaDataDF and UniqueID of goFlux, UniqueID: ", gf_uID)) if (! length(rowNum_ppN2OdataDF) == 1) stop(paste0("ERROR no match of measID in ppN2OdataDF and UniqueID of goFlux, UniqueID: ", gf_uID)) - - # goflux_best_n2o_flux_df << goFlux - goflux_data_metadata_df$n2o_nmolm2sec1[goflux_row] <- goflux_best_n2o_flux_df$best.flux[rowNum_goFluxDF] - goflux_data_metadata_df$n2o_LMFlux[goflux_row] <- goflux_best_n2o_flux_df$LM.flux[rowNum_goFluxDF] - goflux_data_metadata_df$n2o_HMFlux[goflux_row] <- goflux_best_n2o_flux_df$HM.flux[rowNum_goFluxDF] - goflux_data_metadata_df$n2o_bestFlux[goflux_row] <- goflux_best_n2o_flux_df$best.flux[rowNum_goFluxDF] - goflux_data_metadata_df$n2o_bestModel[goflux_row] <- goflux_best_n2o_flux_df$model[rowNum_goFluxDF] # measPeriodMetaDataDF << metaData goflux_data_metadata_df$datetime[goflux_row] <- measPeriodMetaDataDF$datetime_mp_first[rowNum_metaDataDF] # datetime from measurement start - #goflux_data_metadata_df$datetime[goflux_row] <- as.POSIXct(measPeriodMetaDataDF$datetime_mp_first[rowNum_metaDataDF], format = "%Y-%m-%d %H:%M:%S", tz="UTC") goflux_data_metadata_df$meas_ID[goflux_row] <- measPeriodMetaDataDF$meas_ID[rowNum_metaDataDF] goflux_data_metadata_df$plot_no[goflux_row] <- measPeriodMetaDataDF$plot_no[rowNum_metaDataDF] goflux_data_metadata_df$micro_habitat[goflux_row] <- measPeriodMetaDataDF$micro_habitat[rowNum_metaDataDF] goflux_data_metadata_df$light_dark[goflux_row] <- measPeriodMetaDataDF$light_dark[rowNum_metaDataDF] + # goflux_best_n2o_flux_df << goFlux + goflux_data_metadata_df$n2o_nmolm2sec1[goflux_row] <- goflux_best_n2o_flux_df$best.flux[rowNum_goFluxDF] + goflux_data_metadata_df$n2o_LMFlux[goflux_row] <- goflux_best_n2o_flux_df$LM.flux[rowNum_goFluxDF] + goflux_data_metadata_df$n2o_HMFlux[goflux_row] <- goflux_best_n2o_flux_df$HM.flux[rowNum_goFluxDF] + goflux_data_metadata_df$n2o_bestFlux[goflux_row] <- goflux_best_n2o_flux_df$best.flux[rowNum_goFluxDF] + goflux_data_metadata_df$n2o_bestModel[goflux_row] <- goflux_best_n2o_flux_df$model[rowNum_goFluxDF] + # ppN2OdataDF << environmental data goflux_data_metadata_df$par[goflux_row] <- median(ppN2OdataDF$par[rowNum_ppN2OdataDF], na.rm = FALSE) goflux_data_metadata_df$soil_moisture_12cm[goflux_row] <- median(ppN2OdataDF$vwc12[rowNum_ppN2OdataDF], na.rm = FALSE) -- GitLab From efed52a8ed54b25e51b92d103e65b0e5655882a1 Mon Sep 17 00:00:00 2001 From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de> Date: Wed, 21 Feb 2024 16:45:19 +0100 Subject: [PATCH 13/24] bugix --- preprocessing/functions/combine_all_data_and_meta_data.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/preprocessing/functions/combine_all_data_and_meta_data.R b/preprocessing/functions/combine_all_data_and_meta_data.R index 2b7dd10..754cb12 100644 --- a/preprocessing/functions/combine_all_data_and_meta_data.R +++ b/preprocessing/functions/combine_all_data_and_meta_data.R @@ -36,7 +36,7 @@ goflux_data_metadata_df <- data.frame( n2o_LMFlux = rep(NA, goflux_total_mp), n2o_HMFlux = rep(NA, goflux_total_mp), n2o_bestFlux = rep(NA, goflux_total_mp), - n2o_bestModel = rep(NA, goflux_total_mp) + n2o_bestModel = rep(NA, goflux_total_mp), # environmental data par = rep(NA, goflux_total_mp), soil_moisture_12cm = rep(NA, goflux_total_mp), @@ -44,7 +44,7 @@ goflux_data_metadata_df <- data.frame( soil_temperature_15cm_s1 = rep(NA, goflux_total_mp), soil_temperature_15cm_s2 = rep(NA, goflux_total_mp), soil_temperature_15cm_s3 = rep(NA, goflux_total_mp), - soil_temperature_15cm_s4 = rep(NA, goflux_total_mp), + soil_temperature_15cm_s4 = rep(NA, goflux_total_mp) ) # # remove invalid mp from metDataDF -- GitLab From c336cdc8d4c4bdd6e065c7c30397ea9f26b99e23 Mon Sep 17 00:00:00 2001 From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de> Date: Wed, 21 Feb 2024 16:54:52 +0100 Subject: [PATCH 14/24] bugfix --- preprocessing/functions/combine_all_data_and_meta_data.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/preprocessing/functions/combine_all_data_and_meta_data.R b/preprocessing/functions/combine_all_data_and_meta_data.R index 754cb12..ad06edf 100644 --- a/preprocessing/functions/combine_all_data_and_meta_data.R +++ b/preprocessing/functions/combine_all_data_and_meta_data.R @@ -68,8 +68,8 @@ for (goflux_row in 1:goflux_total_mp) { rowNum_ppN2OdataDF <- which(ppN2OdataDF$meas_ID == gf_uID) # test if meas_ID does exist (only once) in metaDataDF & ppN2OdataDF - if (! length(rowNum_metaDataDF) == 1) stop(paste0("ERROR no match of measID in metaDataDF and UniqueID of goFlux, UniqueID: ", gf_uID)) - if (! length(rowNum_ppN2OdataDF) == 1) stop(paste0("ERROR no match of measID in ppN2OdataDF and UniqueID of goFlux, UniqueID: ", gf_uID)) + if (! length(rowNum_metaDataDF) == 1) stop(paste0("ERROR! no match of measID in metaDataDF and UniqueID of goFlux, UniqueID: ", gf_uID)) + if (length(rowNum_ppN2OdataDF) < 1) stop(paste0("ERROR! no match of measID in ppN2OdataDF and UniqueID of goFlux, UniqueID: ", gf_uID)) # measPeriodMetaDataDF << metaData goflux_data_metadata_df$datetime[goflux_row] <- measPeriodMetaDataDF$datetime_mp_first[rowNum_metaDataDF] # datetime from measurement start -- GitLab From 4172d2cc3f702c9c73d5426de66d0f00d1689649 Mon Sep 17 00:00:00 2001 From: Nathalie Triches <ntriches@bgc-jena.mpg.de> Date: Thu, 22 Feb 2024 07:59:57 +0200 Subject: [PATCH 15/24] start visualise fluxes * noticed two different .csv files, trying to figure out which one is the correct one AND contains info about measCamp (May, July, Sept) * currently, the one containing info about measCamp has 400 less.. --- .../functions/visualise_fluxes_from_goflux.R | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/preprocessing/functions/visualise_fluxes_from_goflux.R b/preprocessing/functions/visualise_fluxes_from_goflux.R index b319355..7f62485 100644 --- a/preprocessing/functions/visualise_fluxes_from_goflux.R +++ b/preprocessing/functions/visualise_fluxes_from_goflux.R @@ -3,6 +3,26 @@ # # input DF goflux_data_metadata_df provided by 'combine_all_data_and_meta_data.R' # +# tests 22 Feb 2024 +goflux_data_metadata_df <- read.csv("/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output/flux_analyses/all/20240221_182509_202305_202307_202309_goflux_n2o_data_metadata.csv") + +# library(dplyr) +# library(tidyverse) +# measCampSpring <- goflux_data_metadata_df %>% +# filter(str_detect(datetime, "2023-05")) %>% +# mutate(measCamp="May") +# +# measCampSummer <- goflux_data_metadata_df %>% +# filter(str_detect(datetime, "2023-07")) %>% +# mutate(measCamp="July") +# +# measCampAutumn <- goflux_data_metadata_df %>% +# filter(str_detect(datetime, "2023-09")) %>% +# mutate(measCamp="September") +# +# # rejoin +# test <- measCampSpring +# test <- rbind(test, measCampSummer, measCampAutumn) # R base plots --------------------------------------------------------------- -- GitLab From 4197156fae0b780b6c7836ece7003be1a34bb708 Mon Sep 17 00:00:00 2001 From: Nathalie Triches <ntriches@bgc-jena.mpg.de> Date: Thu, 22 Feb 2024 10:14:34 +0200 Subject: [PATCH 16/24] continue visualising fluxes * for now, I worked with the (...)_corr.csv df with only ~ 640 obs * I will wait with testing the GC comparision and running all measCamp with shorter chamber clusure times * next steps: make sure that code to combine dfs is really working and giving me all my obs --- .../functions/visualise_fluxes_from_goflux.R | 39 ++++++++++++------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/preprocessing/functions/visualise_fluxes_from_goflux.R b/preprocessing/functions/visualise_fluxes_from_goflux.R index 7f62485..1978c8a 100644 --- a/preprocessing/functions/visualise_fluxes_from_goflux.R +++ b/preprocessing/functions/visualise_fluxes_from_goflux.R @@ -4,7 +4,9 @@ # input DF goflux_data_metadata_df provided by 'combine_all_data_and_meta_data.R' # # tests 22 Feb 2024 -goflux_data_metadata_df <- read.csv("/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output/flux_analyses/all/20240221_182509_202305_202307_202309_goflux_n2o_data_metadata.csv") +goflux_data_metadata_df <- read.csv("/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output/flux_analyses/all/20240221_182509_202305_202307_202309_goflux_n2o_data_metadata_corr.csv") +str(goflux_data_metadata_df) +goflux_data_metadata_df$datetime <- as.POSIXct(goflux_data_metadata_df$datetime) # library(dplyr) # library(tidyverse) @@ -173,8 +175,8 @@ n2o_season_palsalichen <- goflux_data_metadata_df %>% labs( title = "Palsa lichen") + theme_bw() + - facet_wrap(vars(measCamp), scales = "free") + - My_Theme + facet_wrap(vars(measCamp), scales = "free_x") + #My_Theme n2o_season_palsalichen # palsa moss 8#### @@ -193,11 +195,11 @@ n2o_season_pm8 <- goflux_data_metadata_df %>% labs( title = "Palsa moss") + theme_bw() + - facet_wrap(vars(measCamp), scales = "free") + - My_Theme + facet_wrap(vars(measCamp), scales = "free_x") + # My_Theme n2o_season_pm8 -# palsa_moss -8 #### +# palsa_moss -8 ---- n2o_season_palsamoss <- goflux_data_metadata_df %>% filter(micro_habitat == "palsa_moss") %>% filter(plot_no != 8) %>% @@ -213,11 +215,11 @@ n2o_season_palsamoss <- goflux_data_metadata_df %>% labs( title = "Palsa moss without plot 8") + theme_bw() + - facet_wrap(vars(measCamp), scales = "free") + - My_Theme + facet_wrap(vars(measCamp), scales = "free_x") + #My_Theme n2o_season_palsamoss -# N2O fluxes over season: bog plots +# N2O fluxes over season: bog plots ---- n2o_season_bog <- goflux_data_metadata_df %>% filter(micro_habitat == "bog") %>% mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>% @@ -232,11 +234,11 @@ n2o_season_bog <- goflux_data_metadata_df %>% labs( title = "Bog") + theme_bw() + - facet_wrap(vars(measCamp), scales = "free") + - My_Theme + facet_wrap(vars(measCamp), scales = "free_x") + #My_Theme n2o_season_bog -# N2O fluxes over season: fen plots +# N2O fluxes over season: fen plots ---- n2o_season_fen <- goflux_data_metadata_df %>% filter(micro_habitat == "fen") %>% mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>% @@ -251,11 +253,18 @@ n2o_season_fen <- goflux_data_metadata_df %>% labs( title = "Fen") + theme_bw() + - facet_wrap(vars(measCamp), scales = "free") + - My_Theme + facet_wrap(vars(measCamp), scales = "free_x") + # + # My_Theme n2o_season_fen -# Boxplots #### +# install.packages("gridExtra") +library(gridExtra) + +grid.arrange(n2o_season_fen, n2o_season_bog, n2o_season_palsamoss, n2o_season_palsalichen) + + +# BoxplogridExtra# Boxplots #### # L + D #### dark_light_season <- goflux_data_metadata_df %>% mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>% -- GitLab From 398ca5bf60435af850c0e48bc42f673069a4cc6b Mon Sep 17 00:00:00 2001 From: Nathalie Triches <ntriches@bgc-jena.mpg.de> Date: Thu, 22 Feb 2024 13:20:00 +0200 Subject: [PATCH 17/24] fix measCamp issue in visualise_fluxes * I just added the rows in simple base R to denote the measurement camp --> no splitting and merging needed * with this, I created a lot of plots and did some first correlation plots, too * also some stats * next steps: see if GC simulation runs --- .../functions/visualise_fluxes_from_goflux.R | 189 ++++++++++++++---- 1 file changed, 147 insertions(+), 42 deletions(-) diff --git a/preprocessing/functions/visualise_fluxes_from_goflux.R b/preprocessing/functions/visualise_fluxes_from_goflux.R index 1978c8a..a340000 100644 --- a/preprocessing/functions/visualise_fluxes_from_goflux.R +++ b/preprocessing/functions/visualise_fluxes_from_goflux.R @@ -3,28 +3,78 @@ # # input DF goflux_data_metadata_df provided by 'combine_all_data_and_meta_data.R' # -# tests 22 Feb 2024 -goflux_data_metadata_df <- read.csv("/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output/flux_analyses/all/20240221_182509_202305_202307_202309_goflux_n2o_data_metadata_corr.csv") -str(goflux_data_metadata_df) +# read 22 Feb 2024 ---- +goflux_data_metadata_df <- read.csv("/home/ntriches/Nextcloud/aniwa/01_qarctic/data_analysis/output/flux_analyses/all/20240221_182509_202305_202307_202309_goflux_n2o_data_metadata.csv") +# add empty column to data frame +goflux_data_metadata_df$measCamp <- NA +# add measCamp using row numbers +goflux_data_metadata_df$measCamp[1:662] <- "May" +goflux_data_metadata_df$measCamp[663:789] <- "July" +goflux_data_metadata_df$measCamp[790:1000] <- "September" +# change datetime to POSIXct goflux_data_metadata_df$datetime <- as.POSIXct(goflux_data_metadata_df$datetime) +# remove white space from micro habitats +goflux_data_metadata_df$micro_habitat <- gsub(" ", "", goflux_data_metadata_df$micro_habitat) +# remove white space from light_dark +goflux_data_metadata_df$light_dark <- gsub(" ", "", goflux_data_metadata_df$light_dark) +# change dak to dark +goflux_data_metadata_df$light_dark[which(goflux_data_metadata_df$light_dark=="dak")] <- "dark" +# change palsa-moss to palsa_moss +goflux_data_metadata_df$micro_habitat[which(goflux_data_metadata_df$micro_habitat=="palsa-moss")] <- "palsa_moss" +# look at structure +str(goflux_data_metadata_df) -# library(dplyr) -# library(tidyverse) -# measCampSpring <- goflux_data_metadata_df %>% -# filter(str_detect(datetime, "2023-05")) %>% -# mutate(measCamp="May") -# -# measCampSummer <- goflux_data_metadata_df %>% -# filter(str_detect(datetime, "2023-07")) %>% -# mutate(measCamp="July") -# -# measCampAutumn <- goflux_data_metadata_df %>% -# filter(str_detect(datetime, "2023-09")) %>% -# mutate(measCamp="September") -# -# # rejoin -# test <- measCampSpring -# test <- rbind(test, measCampSummer, measCampAutumn) +# STATS --------------- +# standard correlation plots ---- +#install.packages("GGally") +library(GGally) +# create subset of df +subset <- goflux_data_metadata_df %>% + select(datetime, plot_no, micro_habitat, + n2o_nmolm2sec1, par, light_dark, + soil_moisture_12cm, soil_moisture_30cm, + soil_temperature_15cm_s1) +# create correlation matrix +plot <- subset %>% + ggpairs(mapping = aes(col = light_dark, alpha = 0.3), + lower = list(combo = wrap("facethist", bins = 20))) +plot + +# create even smaller subset +subsubset <- subset %>% + select(n2o_nmolm2sec1, light_dark, micro_habitat, + soil_moisture_12cm, soil_moisture_30cm, + soil_temperature_15cm_s1) +# create correlation matrix +subplot <- subsubset %>% + ggpairs(mapping = aes(col = light_dark, alpha = 0.3), + lower = list(combo = wrap("facethist", bins = 20))) +subplot + +# fancier correlation plots ---- +install.packages("corrplot") +library(corrplot) +# create subset of goflux_metadata_df with only numerical values +num_values <- goflux_data_metadata_df %>% + select(plot_no, n2o_nmolm2sec1, par, soil_moisture_12cm, soil_moisture_30cm, + soil_temperature_15cm_s1) +# calculate the correlation matrix and round it +cor_matrix <- cor(num_values) +corrplot(cor_matrix, method="circle", type = "upper", cl.pos = "b", tl.pos = "d", tl.cex = 0.6) +# nonsense for my data ? + +# glimpse ---- +library(tidyr) +# use gather() to gather columns into key-value pairs and then glimpse() at the resulting data +# draw a bar plot of each variable +gather(goflux_data_metadata_df) %>% ggplot(aes(value)) + facet_wrap("key", scales = "free") + geom_bar() +# not super useful in my case + +# produce summary statistics by group +summary_stats <- goflux_data_metadata_df %>% + group_by(micro_habitat, light_dark) %>% + summarise(count = n(), mean_grade = mean(n2o_nmolm2sec1)) +summary_stats # R base plots --------------------------------------------------------------- @@ -127,9 +177,10 @@ symlog_trans <- function(base = 10, thr = 1, scale = 1){ n2o_fluxes_seasons <- goflux_data_metadata_df %>% filter(plot_no != 8) %>% mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>% - ggplot(aes(x = datetime, y = n2o_nmolm2sec1, )) + + mutate(measCamp = factor(micro_habitat, levels = c("fen", "bog","palsa_moss","palsa_lichen"))) %>% + ggplot(aes(x = datetime, y = n2o_nmolm2sec1, colour = micro_habitat)) + geom_point(size = 2) + - scale_colour_manual(values = c("black", "orange")) + + scale_colour_manual(values = c("darkblue", "blue", "orange", "yellow")) + geom_smooth(method = "glm", linewidth = 0.5, #formula = y ~ x, colour = "blue") + #ylim(-50,150) + @@ -141,7 +192,7 @@ n2o_fluxes_seasons <- goflux_data_metadata_df %>% My_Theme n2o_fluxes_seasons -# L + D -8 #### +# LIGHT + DARK -8 #### n2o_fluxes_seasons2 <- goflux_data_metadata_df %>% filter(plot_no != 8) %>% mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>% @@ -264,7 +315,7 @@ library(gridExtra) grid.arrange(n2o_season_fen, n2o_season_bog, n2o_season_palsamoss, n2o_season_palsalichen) -# BoxplogridExtra# Boxplots #### +# boxplots #### # L + D #### dark_light_season <- goflux_data_metadata_df %>% mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>% @@ -309,27 +360,32 @@ variability_transects # Spatial variability per measCamp#### variability_measCamp <- goflux_data_metadata_df %>% filter(plot_no != 8) %>% + mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>% + mutate(micro_habitat = factor(micro_habitat, levels = c("palsa_lichen", "palsa_moss", "bog", "fen"))) %>% ggplot(aes(micro_habitat, n2o_nmolm2sec1)) + geom_boxplot() + geom_jitter(aes(colour = micro_habitat)) + - scale_colour_manual(values = c("sienna", "sienna1", "skyblue", "slateblue1")) + + scale_colour_manual(values = c("yellow","orange","skyblue", "blue")) + + #scale_colour_manual(values = c("sienna", "sienna1", "skyblue", "slateblue1")) + labs(title = "Light and dark measurements without plot 8") + ylab(expression('N'['2']*'O' * ' flux ('* 'nmol' ~N[2]*O-N~ m^-2~s^-1*')')) + xlab("") + - facet_wrap(vars(measCamp)) + - My_Theme + facet_wrap(vars(measCamp)) + #My_Theme # ylim(-0.001, 0.001) + # scale_y_continuous(trans ="symlog",limits=c(0,0.001)) variability_measCamp # L per micro habitat #### light <- goflux_data_metadata_df %>% + mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>% + mutate(micro_habitat = factor(micro_habitat, levels = c("palsa_lichen", "palsa_moss", "bog", "fen"))) %>% filter(plot_no != 8) %>% filter(light_dark == "light") %>% ggplot(aes(micro_habitat, n2o_nmolm2sec1)) + geom_boxplot() + geom_jitter(aes(colour = micro_habitat)) + - scale_colour_manual(values = c("sienna", "sienna1", "skyblue", "slateblue1")) + + scale_colour_manual(values = c("yellow","orange","skyblue", "blue")) + labs(title = "Light measurements without plot 8") + ylab(expression('N'['2']*'O' * ' flux ('* 'nmol' ~N[2]*O-N~ m^-2~s^-1*')')) + My_Theme @@ -339,12 +395,14 @@ light # D per micro habitat #### dark <- goflux_data_metadata_df %>% + mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>% + mutate(micro_habitat = factor(micro_habitat, levels = c("palsa_lichen", "palsa_moss", "bog", "fen"))) %>% filter(plot_no != 8) %>% filter(light_dark == "dark") %>% ggplot(aes(micro_habitat, n2o_nmolm2sec1)) + geom_boxplot() + geom_jitter(aes(colour = micro_habitat)) + - scale_colour_manual(values = c("sienna", "sienna1", "skyblue", "slateblue1")) + + scale_colour_manual(values = c("yellow","orange","skyblue", "blue")) + labs(title = "Dark measurements without plot 8") + ylab(expression('N'['2']*'O' * ' flux ('* 'nmol' ~N[2]*O-N~ m^-2~s^-1*')')) + My_Theme @@ -352,35 +410,58 @@ dark <- goflux_data_metadata_df %>% # scale_y_continuous(trans ="symlog",limits=c(0,0.001)) dark +# Soil T + M --------------------------------------------- +tests <- goflux_data_metadata_df %>% + filter(measCamp == "May") %>% + group_by(plot_no) %>% + summarise(mean_flux = mean(n2o_nmolm2sec1)) %>% + ggplot(aes(x = plot_no, y = mean_flux)) + + geom_point() +tests + + +df %>% + group_by(age) %>% + summarise(mean_score = mean(score)) %>% + ggplot(aes(x = factor(age), y = mean_score)) + + geom_col() + + labs(x = "Age", y = "Mean score") + + + # Soil temperature #### -# Soil T4 overall #### +# Soil T4 overall per measCamp #### n2o_fluxes_soiltemp4 <- goflux_data_metadata_df %>% + mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>% + #mutate(measCamp = factor(micro_habitat, levels = c("fen", "bog","palsa_moss","palsa_lichen"))) %>% filter(plot_no != 8) %>% - ggplot( aes(x = soil_temperature_15cm_s4, y = n2o_nmolm2sec1, colour = micro_habitat)) + + ggplot( aes(x = soil_temperature_15cm_s4, y = n2o_nmolm2sec1)) + geom_point(size = 2) + - scale_colour_manual(values = c("sienna", "sienna1", "skyblue", "slateblue1")) + geom_smooth(method = "glm", linewidth = 0.5, #formula = y ~ x, colour = "blue") + #ylim(-0.001, 0.001) + xlab("Soil temperature (°C) in 15 cm depth") + ylab(expression('N'['2']*'O' * ' flux ('* 'nmol' ~N[2]*O-N~ m^-2~s^-1*')')) + theme_bw() + + facet_wrap(vars(measCamp), scales = "free_x") + My_Theme n2o_fluxes_soiltemp4 # Soil T4 per micro habitat#### n2o_fluxes_soiltemp4 <- goflux_data_metadata_df %>% + mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>% + mutate(measCamp = factor(micro_habitat, levels = c("fen", "bog","palsa_moss","palsa_lichen"))) %>% filter(plot_no != 8) %>% - ggplot( aes(x = soil_temperature_15cm_s1, y = n2o_nmolm2sec1, colour = micro_habitat, shape = micro_habitat)) + + ggplot( aes(x = soil_temperature_15cm_s4, y = n2o_nmolm2sec1, colour = micro_habitat, shape = micro_habitat)) + geom_point(size = 2) + - scale_colour_manual(values = c("sienna", "sienna1", "skyblue", "slateblue1")) + + scale_colour_manual(values = c("blue", "darkblue", "yellow", "orange")) + geom_smooth(method = "glm", linewidth = 0.5, #formula = y ~ x, colour = "blue") + #ylim(-0.001, 0.001) + xlab("Soil temperature (°C) in 15 cm depth") + - ylab(expression('N'['2']*'O' * ' flux ('* 'mg' ~N[2]*O-N~ m^-2~h^-1*')')) + + ylab(expression('N'['2']*'O' * ' flux ('* 'nmol' ~N[2]*O-N~ m^-2~s^-1*')')) + theme_bw() + - facet_wrap(vars(micro_habitat)) + + facet_wrap(vars(measCamp), scales = "free_x") + My_Theme n2o_fluxes_soiltemp4 @@ -388,10 +469,12 @@ n2o_fluxes_soiltemp4 # Soil moisture #### # Soil M 12cm #### n2o_fluxes_soilmoist_12cm <- goflux_data_metadata_df %>% + mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>% + mutate(measCamp = factor(micro_habitat, levels = c("fen", "bog","palsa_moss","palsa_lichen"))) %>% filter(plot_no != 8) %>% - ggplot( aes(x = soil_moisture_12cm, y = n2o_nmolm2sec1, colour = micro_habitat)) + + ggplot( aes(x = soil_moisture_12cm, y = n2o_nmolm2sec1, colour = micro_habitat, shape = micro_habitat)) + geom_point(size = 2) + - scale_colour_manual(values = c("sienna", "sienna1", "skyblue", "slateblue1")) + + scale_colour_manual(values = c("blue", "darkblue", "yellow", "orange")) + geom_smooth(method = "glm", linewidth = 0.5, #formula = y ~ x, colour = "blue") + #ylim(-0.001, 0.001) + @@ -404,12 +487,14 @@ n2o_fluxes_soilmoist_12cm # Soil M 30 cm #### n2o_fluxes_soilmoist_30cm <- goflux_data_metadata_df %>% + mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>% + mutate(measCamp = factor(micro_habitat, levels = c("fen", "bog","palsa_moss","palsa_lichen"))) %>% filter(plot_no != 8) %>% - filter(soil_moisture_30cm < 2000) %>% # remove some weird error values - filter(soil_moisture_30cm > 0) %>% # remove 0s - ggplot( aes(x = soil_moisture_30cm, y = n2o_nmolm2sec1, colour = micro_habitat)) + + #filter(soil_moisture_30cm < 2000) %>% # remove some weird error values + #filter(soil_moisture_30cm > 0) %>% # remove 0s + ggplot( aes(x = soil_moisture_30cm, y = n2o_nmolm2sec1, colour = micro_habitat, shape=micro_habitat)) + geom_point(size = 2) + - scale_colour_manual(values = c("sienna", "sienna1", "skyblue", "slateblue1")) + + scale_colour_manual(values = c("blue", "darkblue", "yellow", "orange")) + geom_smooth(method = "glm", linewidth = 0.5, #formula = y ~ x, colour = "blue") + #ylim(-0.001, 0.001) + @@ -420,3 +505,23 @@ n2o_fluxes_soilmoist_30cm <- goflux_data_metadata_df %>% My_Theme n2o_fluxes_soilmoist_30cm +# Spatial variability within micro habitat ------- +# Spatial variability per measCamp#### +variability_fen <- goflux_data_metadata_df %>% + filter(light_dark == "light") %>% + filter(micro_habitat == "fen") %>% + mutate(measCamp = factor(measCamp, levels = c("May","July","September"))) %>% + ggplot(aes(x = plot_no, y = n2o_nmolm2sec1, group = plot_no)) + + geom_boxplot() + + #geom_jitter(aes(colour = micro_habitat)) + + #scale_colour_manual(values = c("yellow","orange","skyblue", "blue")) + + #scale_colour_manual(values = c("sienna", "sienna1", "skyblue", "slateblue1")) + + labs(title = "Variability of light measurements within fen plots") + + ylab(expression('N'['2']*'O' * ' flux ('* 'nmol' ~N[2]*O-N~ m^-2~s^-1*')')) + + xlab("") + + facet_wrap(vars(measCamp)) + + My_Theme +# ylim(-0.001, 0.001) + +# scale_y_continuous(trans ="symlog",limits=c(0,0.001)) +variability_fen + -- GitLab From e2cd1e521c0ecd736ff0ace53c6d1deff1896031 Mon Sep 17 00:00:00 2001 From: Nathalie Triches <ntriches@bgc-jena.mpg.de> Date: Thu, 22 Feb 2024 16:58:14 +0200 Subject: [PATCH 18/24] test GCsimulation script: working! * GC simulation script is working well :D * next steps (as discussed on BBB): Jan will clean the combine_all_data_and_meta_data.R script to make sure not too many mps are kicked out; take the last reading from the soil T and soil M sensors as single average for these variables, and merge the branch --- .../combine_all_data_and_meta_data.R | 58 +++++++++---------- preprocessing/run_all_main.R | 2 +- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/preprocessing/functions/combine_all_data_and_meta_data.R b/preprocessing/functions/combine_all_data_and_meta_data.R index ad06edf..d58ec60 100644 --- a/preprocessing/functions/combine_all_data_and_meta_data.R +++ b/preprocessing/functions/combine_all_data_and_meta_data.R @@ -116,35 +116,35 @@ goflux_data_metadata_df$light_dark[which(goflux_data_metadata_df$light_dark=="da # change palsa-moss to palsa_moss goflux_data_metadata_df$micro_habitat[which(goflux_data_metadata_df$micro_habitat=="palsa-moss")] <- "palsa_moss" -# remove values below -1000 -goflux_data_metadata_df <- goflux_data_metadata_df %>% - filter(n2o_nmolm2sec1 > -500) -# remove any line that contains NA -rowContainsNoNA <- complete.cases(goflux_data_metadata_df) -goflux_data_metadata_df <- goflux_data_metadata_df %>% - filter(rowContainsNoNA) - - -# create new DF - one per measCampaign -# needs 'stringr' library -# selection based on 'datetime' column containing "year-month" -# and replace 'measCamp' column values with a string of the name of the month - -measCampSpring <- goflux_data_metadata_df %>% - filter(str_detect(datetime, "2023-05")) %>% - mutate(measCamp="May") - -measCampSummer <- goflux_data_metadata_df %>% - filter(str_detect(datetime, "2023-07")) %>% - mutate(measCamp="July") - -measCampAutumn <- goflux_data_metadata_df %>% - filter(str_detect(datetime, "2023-09")) %>% - mutate(measCamp="September") - -# combine data frames from all measurement campaigns to one data set -goflux_data_metadata_df <- measCampSpring -goflux_data_metadata_df <- rbind(goflux_data_metadata_df, measCampSummer, measCampAutumn) +# # remove values below -1000 +# goflux_data_metadata_df <- goflux_data_metadata_df %>% +# filter(n2o_nmolm2sec1 > -500) +# # remove any line that contains NA +# rowContainsNoNA <- complete.cases(goflux_data_metadata_df) +# goflux_data_metadata_df <- goflux_data_metadata_df %>% +# filter(rowContainsNoNA) +# +# +# # create new DF - one per measCampaign +# # needs 'stringr' library +# # selection based on 'datetime' column containing "year-month" +# # and replace 'measCamp' column values with a string of the name of the month +# +# measCampSpring <- goflux_data_metadata_df %>% +# filter(str_detect(datetime, "2023-05")) %>% +# mutate(measCamp="May") +# +# measCampSummer <- goflux_data_metadata_df %>% +# filter(str_detect(datetime, "2023-07")) %>% +# mutate(measCamp="July") +# +# measCampAutumn <- goflux_data_metadata_df %>% +# filter(str_detect(datetime, "2023-09")) %>% +# mutate(measCamp="September") +# +# # combine data frames from all measurement campaigns to one data set +# goflux_data_metadata_df <- measCampSpring +# goflux_data_metadata_df <- rbind(goflux_data_metadata_df, measCampSummer, measCampAutumn) # write corrected DF goflux_data_metadata_df write.table(goflux_data_metadata_df, diff --git a/preprocessing/run_all_main.R b/preprocessing/run_all_main.R index ae47d21..05b5bb5 100644 --- a/preprocessing/run_all_main.R +++ b/preprocessing/run_all_main.R @@ -32,7 +32,7 @@ process_aeris_raw_data <- "F" create_plots_ghg_conc <- "T" # use the PGA data to simulate GC data, and run only specific filter/correction functions -simulateGCdata <- "F" # [F,T] +simulateGCdata <- "T" # [F,T] # install / update goFlux package (to ensure using the most recent version) in 'main_flux_calculation.R' updateGoFluxPkg <- "T" # [F,T] -- GitLab From fb81a1ffaecccdb616d0294eee1dcab00a023859 Mon Sep 17 00:00:00 2001 From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de> Date: Fri, 23 Feb 2024 19:54:51 +0100 Subject: [PATCH 19/24] minor cleanup, changes and prep. for upcomming changes --- .../calculate_fluxes_with_goflux_package.R | 3 +- .../combine_all_data_and_meta_data.R | 30 ------------------- preprocessing/run_all_main.R | 5 ++-- 3 files changed, 4 insertions(+), 34 deletions(-) diff --git a/preprocessing/functions/calculate_fluxes_with_goflux_package.R b/preprocessing/functions/calculate_fluxes_with_goflux_package.R index 0d10072..bf6dcf3 100644 --- a/preprocessing/functions/calculate_fluxes_with_goflux_package.R +++ b/preprocessing/functions/calculate_fluxes_with_goflux_package.R @@ -23,8 +23,7 @@ goflux_best_n2o_flux_df <- best.flux(flux.result = n2o_flux_result_df, g.limit # plot results of best fluxes plot_list <- flux.plot(flux.results = goflux_best_n2o_flux_df, dataframe = goflux_input_n2o_df, gastype = "N2Odry_ppb", shoulder = 20, - plot.legend = c("RMSE", "AICc", "SE", "r2", "g.factor"), # a maximum of 5 additional parameters can be displayed above the plot - #plot.legend = c("MAE", "RMSE", "AICc", "SE", "r2", "g.factor"), + plot.legend = c("MAE", "AICc", "SE", "r2", "g.factor"), # a maximum of 5 additional parameters can be displayed above the plot best.model = TRUE) # save output file as pdf - figures (this function does save files to the 'present working dir' - hence, changing into 'outDir') diff --git a/preprocessing/functions/combine_all_data_and_meta_data.R b/preprocessing/functions/combine_all_data_and_meta_data.R index d58ec60..dd5acbb 100644 --- a/preprocessing/functions/combine_all_data_and_meta_data.R +++ b/preprocessing/functions/combine_all_data_and_meta_data.R @@ -116,36 +116,6 @@ goflux_data_metadata_df$light_dark[which(goflux_data_metadata_df$light_dark=="da # change palsa-moss to palsa_moss goflux_data_metadata_df$micro_habitat[which(goflux_data_metadata_df$micro_habitat=="palsa-moss")] <- "palsa_moss" -# # remove values below -1000 -# goflux_data_metadata_df <- goflux_data_metadata_df %>% -# filter(n2o_nmolm2sec1 > -500) -# # remove any line that contains NA -# rowContainsNoNA <- complete.cases(goflux_data_metadata_df) -# goflux_data_metadata_df <- goflux_data_metadata_df %>% -# filter(rowContainsNoNA) -# -# -# # create new DF - one per measCampaign -# # needs 'stringr' library -# # selection based on 'datetime' column containing "year-month" -# # and replace 'measCamp' column values with a string of the name of the month -# -# measCampSpring <- goflux_data_metadata_df %>% -# filter(str_detect(datetime, "2023-05")) %>% -# mutate(measCamp="May") -# -# measCampSummer <- goflux_data_metadata_df %>% -# filter(str_detect(datetime, "2023-07")) %>% -# mutate(measCamp="July") -# -# measCampAutumn <- goflux_data_metadata_df %>% -# filter(str_detect(datetime, "2023-09")) %>% -# mutate(measCamp="September") -# -# # combine data frames from all measurement campaigns to one data set -# goflux_data_metadata_df <- measCampSpring -# goflux_data_metadata_df <- rbind(goflux_data_metadata_df, measCampSummer, measCampAutumn) - # write corrected DF goflux_data_metadata_df write.table(goflux_data_metadata_df, paste0(fluxResOutDir, "/", currentDateTime, "_", measCampListString, "goflux_n2o_data_metadata_corr.csv"), diff --git a/preprocessing/run_all_main.R b/preprocessing/run_all_main.R index 05b5bb5..606d951 100644 --- a/preprocessing/run_all_main.R +++ b/preprocessing/run_all_main.R @@ -13,7 +13,8 @@ # 1. Script settings ---- # list of measurement campaigns to run [YYYYMM] -measCampList <- c("202305", "202307", "202309") +measCampList <- c("202305", "202307", "202309") +measCampMonthList <- c("May", "July", "September") # gas type to process [T,F] process_co2 <- "F" @@ -32,7 +33,7 @@ process_aeris_raw_data <- "F" create_plots_ghg_conc <- "T" # use the PGA data to simulate GC data, and run only specific filter/correction functions -simulateGCdata <- "T" # [F,T] +simulateGCdata <- "F" # [F,T] # install / update goFlux package (to ensure using the most recent version) in 'main_flux_calculation.R' updateGoFluxPkg <- "T" # [F,T] -- GitLab From 5e0008189ec28ddece23e5767360b363ccfa62de Mon Sep 17 00:00:00 2001 From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de> Date: Fri, 23 Feb 2024 20:57:34 +0100 Subject: [PATCH 20/24] modify soil moisture & temperature calculation for results DF * tested: works --- .../combine_all_data_and_meta_data.R | 39 ++++++++++--------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/preprocessing/functions/combine_all_data_and_meta_data.R b/preprocessing/functions/combine_all_data_and_meta_data.R index dd5acbb..a07b031 100644 --- a/preprocessing/functions/combine_all_data_and_meta_data.R +++ b/preprocessing/functions/combine_all_data_and_meta_data.R @@ -7,10 +7,7 @@ # output DF: goflux_data_metadata_df # # colnames(goflux_data_metadata_df) -# [1] "datetime" "meas_ID" "plot_no" "micro_habitat" "par" "light_dark" -# [7] "soil_moisture_12cm" "soil_moisture_30cm" "soil_temperature_15cm_s1" "soil_temperature_15cm_s2" "soil_temperature_15cm_s3" "soil_temperature_15cm_s4" -# [13] "ch4_µmolm2sec1" "co2_µmolm2sec1" "n2o_nmolm2sec1" "n2o_LMFlux" "n2o_HMFlux" "n2o_bestFlux" -# [19] "n2o_bestModel" "measCamp" + # number of measurement periods (mp) @@ -41,10 +38,7 @@ goflux_data_metadata_df <- data.frame( par = rep(NA, goflux_total_mp), soil_moisture_12cm = rep(NA, goflux_total_mp), soil_moisture_30cm = rep(NA, goflux_total_mp), - soil_temperature_15cm_s1 = rep(NA, goflux_total_mp), - soil_temperature_15cm_s2 = rep(NA, goflux_total_mp), - soil_temperature_15cm_s3 = rep(NA, goflux_total_mp), - soil_temperature_15cm_s4 = rep(NA, goflux_total_mp) + soil_temperature_15cm_sm = rep(NA, goflux_total_mp) # median of values from four sensors ) # # remove invalid mp from metDataDF @@ -64,12 +58,16 @@ for (goflux_row in 1:goflux_total_mp) { # get the rows of goFlux output and metDataDF corresponding with the gf_uID rowNum_goFluxDF <- goflux_row - rowNum_metaDataDF <- which(measPeriodMetaDataDF$meas_ID == gf_uID) - rowNum_ppN2OdataDF <- which(ppN2OdataDF$meas_ID == gf_uID) + rowNum_metaDataDF <- which(measPeriodMetaDataDF$meas_ID == gf_uID) + + # get the last x rows of the pre-processed DF that contains environmental data + allRows_ppN2OdataDF <- which(ppN2OdataDF$meas_ID == gf_uID) + numberOfLastRowsToInclude <- 10 + selRows_ppN2OdataDF <- allRows_ppN2OdataDF[(length(allRows_ppN2OdataDF) - numberOfLastRowsToInclude):length(allRows_ppN2OdataDF)] # test if meas_ID does exist (only once) in metaDataDF & ppN2OdataDF if (! length(rowNum_metaDataDF) == 1) stop(paste0("ERROR! no match of measID in metaDataDF and UniqueID of goFlux, UniqueID: ", gf_uID)) - if (length(rowNum_ppN2OdataDF) < 1) stop(paste0("ERROR! no match of measID in ppN2OdataDF and UniqueID of goFlux, UniqueID: ", gf_uID)) + if (length(allRows_ppN2OdataDF) < 1) stop(paste0("ERROR! no match of measID in ppN2OdataDF and UniqueID of goFlux, UniqueID: ", gf_uID)) # measPeriodMetaDataDF << metaData goflux_data_metadata_df$datetime[goflux_row] <- measPeriodMetaDataDF$datetime_mp_first[rowNum_metaDataDF] # datetime from measurement start @@ -86,14 +84,17 @@ for (goflux_row in 1:goflux_total_mp) { goflux_data_metadata_df$n2o_bestModel[goflux_row] <- goflux_best_n2o_flux_df$model[rowNum_goFluxDF] # ppN2OdataDF << environmental data - goflux_data_metadata_df$par[goflux_row] <- median(ppN2OdataDF$par[rowNum_ppN2OdataDF], na.rm = FALSE) - goflux_data_metadata_df$soil_moisture_12cm[goflux_row] <- median(ppN2OdataDF$vwc12[rowNum_ppN2OdataDF], na.rm = FALSE) - goflux_data_metadata_df$soil_moisture_30cm[goflux_row] <- median(ppN2OdataDF$vwc30[rowNum_ppN2OdataDF], na.rm = FALSE) - goflux_data_metadata_df$soil_temperature_15cm_s1[goflux_row] <- median(ppN2OdataDF$soilT1degC[rowNum_ppN2OdataDF], na.rm = FALSE) - goflux_data_metadata_df$soil_temperature_15cm_s2[goflux_row] <- median(ppN2OdataDF$soilT2degC[rowNum_ppN2OdataDF], na.rm = FALSE) - goflux_data_metadata_df$soil_temperature_15cm_s3[goflux_row] <- median(ppN2OdataDF$soilT3degC[rowNum_ppN2OdataDF], na.rm = FALSE) - goflux_data_metadata_df$soil_temperature_15cm_s4[goflux_row] <- median(ppN2OdataDF$soilT4degC[rowNum_ppN2OdataDF], na.rm = FALSE) - + # use all PAR values + goflux_data_metadata_df$par[goflux_row] <- median(ppN2OdataDF$par[allRows_ppN2OdataDF], na.rm = TRUE) + # use the last x values of the mp + goflux_data_metadata_df$soil_moisture_12cm[goflux_row] <- median(ppN2OdataDF$vwc12[selRows_ppN2OdataDF], na.rm = TRUE) + goflux_data_metadata_df$soil_moisture_30cm[goflux_row] <- median(ppN2OdataDF$vwc30[selRows_ppN2OdataDF], na.rm = TRUE) + # use the last x values of the mp and average across sensors + goflux_data_metadata_df$soil_temperature_15cm_sm[goflux_row] <- median(c(ppN2OdataDF$soilT1degC[selRows_ppN2OdataDF], + ppN2OdataDF$soilT2degC[selRows_ppN2OdataDF], + ppN2OdataDF$soilT3degC[selRows_ppN2OdataDF], + ppN2OdataDF$soilT4degC[selRows_ppN2OdataDF]), + na.rm = TRUE) } # write DF goflux_data_metadata_df -- GitLab From b73783191b73a2048469a502c117b8c39f9fe10e Mon Sep 17 00:00:00 2001 From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de> Date: Fri, 23 Feb 2024 21:04:02 +0100 Subject: [PATCH 21/24] rename four flux output file names for consistency and improved readability --- .../functions/calculate_fluxes_with_goflux_package.R | 4 ++-- preprocessing/functions/combine_all_data_and_meta_data.R | 4 ++-- preprocessing/run_all_main.R | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/preprocessing/functions/calculate_fluxes_with_goflux_package.R b/preprocessing/functions/calculate_fluxes_with_goflux_package.R index bf6dcf3..c30c091 100644 --- a/preprocessing/functions/calculate_fluxes_with_goflux_package.R +++ b/preprocessing/functions/calculate_fluxes_with_goflux_package.R @@ -33,8 +33,8 @@ setwd(mainDir) # save output file from flux calculations write.table(n2o_flux_result_df, - paste0(fluxResOutDir, "/measCamp_", measCampListString, "goflux_goflux_n2o_output.csv"), + paste0(fluxResOutDir, "/", currentDateTime, "goflux_goflux_n2o_output__mc", measCampListString, ".csv"), row.names = FALSE, quote = FALSE, sep = ";", col.names = TRUE, na = "NA") write.table(goflux_best_n2o_flux_df, - paste0(fluxResOutDir, "/measCamp_", measCampListString, "goflux_bestflux_n2o_output.csv"), + paste0(fluxResOutDir, "/", currentDateTime, "goflux_bestflux_n2o_output__mc", measCampListString, ".csv"), row.names = FALSE, quote = FALSE, sep = ";", col.names = TRUE, na = "NA") diff --git a/preprocessing/functions/combine_all_data_and_meta_data.R b/preprocessing/functions/combine_all_data_and_meta_data.R index a07b031..29e11e7 100644 --- a/preprocessing/functions/combine_all_data_and_meta_data.R +++ b/preprocessing/functions/combine_all_data_and_meta_data.R @@ -99,7 +99,7 @@ for (goflux_row in 1:goflux_total_mp) { # write DF goflux_data_metadata_df write.table(goflux_data_metadata_df, - paste0(fluxResOutDir, "/", currentDateTime, "_", measCampListString, "goflux_n2o_data_metadata.csv"), + paste0(fluxResOutDir, "/", currentDateTime, "_goflux_n2o_data_metadata__mc", measCampListString, ".csv"), row.names = FALSE, quote = FALSE, sep = ",") @@ -119,7 +119,7 @@ goflux_data_metadata_df$micro_habitat[which(goflux_data_metadata_df$micro_habita # write corrected DF goflux_data_metadata_df write.table(goflux_data_metadata_df, - paste0(fluxResOutDir, "/", currentDateTime, "_", measCampListString, "goflux_n2o_data_metadata_corr.csv"), + paste0(fluxResOutDir, "/", currentDateTime, "_goflux_n2o_data_metadata_corrected__mc", measCampListString, ".csv"), row.names = FALSE, quote = FALSE, sep = ",") diff --git a/preprocessing/run_all_main.R b/preprocessing/run_all_main.R index 606d951..6b1fe80 100644 --- a/preprocessing/run_all_main.R +++ b/preprocessing/run_all_main.R @@ -48,7 +48,7 @@ updateGoFluxPkg <- "T" # [F,T] # create string with all measCamp measCampListString <- character() for (mc in measCampList){ - measCampListString <- paste0(measCampListString, mc, "_") + measCampListString <- paste0(measCampListString, "_", mc) } # set dirs -- GitLab From 4db43cb7d65773f4c27cf1df551c7a6f667dc1f3 Mon Sep 17 00:00:00 2001 From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de> Date: Fri, 23 Feb 2024 21:12:21 +0100 Subject: [PATCH 22/24] minor changes in output file names --- .../functions/calculate_fluxes_with_goflux_package.R | 4 ++-- preprocessing/functions/combine_all_data_and_meta_data.R | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/preprocessing/functions/calculate_fluxes_with_goflux_package.R b/preprocessing/functions/calculate_fluxes_with_goflux_package.R index c30c091..a5c76df 100644 --- a/preprocessing/functions/calculate_fluxes_with_goflux_package.R +++ b/preprocessing/functions/calculate_fluxes_with_goflux_package.R @@ -33,8 +33,8 @@ setwd(mainDir) # save output file from flux calculations write.table(n2o_flux_result_df, - paste0(fluxResOutDir, "/", currentDateTime, "goflux_goflux_n2o_output__mc", measCampListString, ".csv"), + paste0(fluxResOutDir, "/", currentDateTime, "__goflux_goflux_n2o_output__mc", measCampListString, ".csv"), row.names = FALSE, quote = FALSE, sep = ";", col.names = TRUE, na = "NA") write.table(goflux_best_n2o_flux_df, - paste0(fluxResOutDir, "/", currentDateTime, "goflux_bestflux_n2o_output__mc", measCampListString, ".csv"), + paste0(fluxResOutDir, "/", currentDateTime, "__goflux_bestflux_n2o_output__mc", measCampListString, ".csv"), row.names = FALSE, quote = FALSE, sep = ";", col.names = TRUE, na = "NA") diff --git a/preprocessing/functions/combine_all_data_and_meta_data.R b/preprocessing/functions/combine_all_data_and_meta_data.R index 29e11e7..4ef6b64 100644 --- a/preprocessing/functions/combine_all_data_and_meta_data.R +++ b/preprocessing/functions/combine_all_data_and_meta_data.R @@ -99,7 +99,7 @@ for (goflux_row in 1:goflux_total_mp) { # write DF goflux_data_metadata_df write.table(goflux_data_metadata_df, - paste0(fluxResOutDir, "/", currentDateTime, "_goflux_n2o_data_metadata__mc", measCampListString, ".csv"), + paste0(fluxResOutDir, "/", currentDateTime, "__goflux_n2o_data_metadata__mc", measCampListString, ".csv"), row.names = FALSE, quote = FALSE, sep = ",") @@ -119,7 +119,7 @@ goflux_data_metadata_df$micro_habitat[which(goflux_data_metadata_df$micro_habita # write corrected DF goflux_data_metadata_df write.table(goflux_data_metadata_df, - paste0(fluxResOutDir, "/", currentDateTime, "_goflux_n2o_data_metadata_corrected__mc", measCampListString, ".csv"), + paste0(fluxResOutDir, "/", currentDateTime, "__goflux_n2o_data_metadata_corrected__mc", measCampListString, ".csv"), row.names = FALSE, quote = FALSE, sep = ",") -- GitLab From 82f9a2acacae71aa8517045bfa284b7220abff85 Mon Sep 17 00:00:00 2001 From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de> Date: Fri, 23 Feb 2024 21:34:51 +0100 Subject: [PATCH 23/24] add measCamp and the name of the month of the measCamp to metaDataDF and flux output DF * not yet tested --- .../combine_all_data_and_meta_data.R | 4 ++++ preprocessing/main_data_preprocessing.R | 19 +++++++++++++------ preprocessing/run_all_main.R | 7 ++++++- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/preprocessing/functions/combine_all_data_and_meta_data.R b/preprocessing/functions/combine_all_data_and_meta_data.R index 4ef6b64..2162e48 100644 --- a/preprocessing/functions/combine_all_data_and_meta_data.R +++ b/preprocessing/functions/combine_all_data_and_meta_data.R @@ -23,6 +23,8 @@ goflux_data_metadata_df <- data.frame( # metaData datetime = as.POSIXct(rep("1000-01-01 00:00:00", goflux_total_mp), format = "%Y-%m-%d %H:%M:%S", tz="UTC" ), meas_ID = rep(NA, goflux_total_mp), + meas_camp = rep(NA, goflux_total_mp), + meas_camp_month = rep(NA, goflux_total_mp), plot_no = rep(NA, goflux_total_mp), micro_habitat = rep(NA, goflux_total_mp), light_dark = rep(NA, goflux_total_mp), @@ -72,6 +74,8 @@ for (goflux_row in 1:goflux_total_mp) { # measPeriodMetaDataDF << metaData goflux_data_metadata_df$datetime[goflux_row] <- measPeriodMetaDataDF$datetime_mp_first[rowNum_metaDataDF] # datetime from measurement start goflux_data_metadata_df$meas_ID[goflux_row] <- measPeriodMetaDataDF$meas_ID[rowNum_metaDataDF] + goflux_data_metadata_df$meas_camp[goflux_row] <- measPeriodMetaDataDF$meas_camp[rowNum_metaDataDF] + goflux_data_metadata_df$meas_camp_month[goflux_row] <- measPeriodMetaDataDF$meas_camp_month[rowNum_metaDataDF] goflux_data_metadata_df$plot_no[goflux_row] <- measPeriodMetaDataDF$plot_no[rowNum_metaDataDF] goflux_data_metadata_df$micro_habitat[goflux_row] <- measPeriodMetaDataDF$micro_habitat[rowNum_metaDataDF] goflux_data_metadata_df$light_dark[goflux_row] <- measPeriodMetaDataDF$light_dark[rowNum_metaDataDF] diff --git a/preprocessing/main_data_preprocessing.R b/preprocessing/main_data_preprocessing.R index 1142a75..2e3ef8d 100644 --- a/preprocessing/main_data_preprocessing.R +++ b/preprocessing/main_data_preprocessing.R @@ -25,6 +25,7 @@ f_main_data_preprocessing <- function(username, outDir, currentDateTime, measCamp, + measCampMonth, expYear){ # print info @@ -120,7 +121,13 @@ if (expYear == "2022"){ } -# 4 Plot GHG concentrations ---- +# 4 Add two new columns to metaDataDF with the measCamp and the month as name ---- + # e.g., "202305" and "May" + measPeriodMetaDataDF$meas_camp <- c(rep(measCamp, nrow(measPeriodMetaDataDF))) + measPeriodMetaDataDF$meas_camp_month <- c(rep(measCampMonth, nrow(measPeriodMetaDataDF))) + + +# 5 Plot GHG concentrations ---- # needed for data quality control during field campaign # output: concentration / time during chamber closure (measurement period) @@ -139,7 +146,7 @@ if (create_plots_ghg_conc == "T"){ } -# 5. Correct measurement data per measurement period and gas ---- +# 6. Correct measurement data per measurement period and gas ---- # info print("start correcting and filter measurement periods of N2O and associated environmental data") @@ -177,7 +184,7 @@ for (row in 1:nrow(measPeriodMetaDataDF)){ rownames(measPeriodN2oCorr) <- seq(1:nrow(measPeriodN2oCorr)) -# 5.1 Plot figures comparing data pre and post data-correction ---- +# 6.1 Plot figures comparing data pre and post data-correction ---- source("functions/plot_ghg_conc_pre_post_mp_correction.R") if (process_n2o == "T"){ @@ -189,7 +196,7 @@ if (process_n2o == "T"){ } -# 5.2 Add new column "time_since_mp_start" to DF ---- +# 6.2 Add new column "time_since_mp_start" to DF ---- # needed for flux calculation with goFlux package # run this only after the measPeriod data was modified / corrected ! @@ -208,7 +215,7 @@ if (process_n2o == "T"){ gas_ID$n2o_aeris) } -# 6. Sample gas concentration data for GC simulation ---- +# 7. Sample gas concentration data for GC simulation ---- if (simulateGCdata == "T"){ print("") print(">> start sampling data for GC simulation") @@ -290,7 +297,7 @@ if (simulateGCdata == "T"){ rownames(simGCsamplesDF) <- seq(1:nrow(simGCsamplesDF)) } -# 7. Create DF with only valid measurement periods ---- +# 8. Create DF with only valid measurement periods ---- print("") print(">> create DF with only valid measurement periods") diff --git a/preprocessing/run_all_main.R b/preprocessing/run_all_main.R index 6b1fe80..a5cf4ed 100644 --- a/preprocessing/run_all_main.R +++ b/preprocessing/run_all_main.R @@ -132,7 +132,11 @@ ppN2OdataDF <- data.frame() measPeriodMetaDataDF <- data.frame() # loop over measCampList -for (measCamp in measCampList){ +for (mc in 1:length(measCampList)){ + + # get measCamp YYYYMM and name of the month + measCamp <- measCampList[mc] + measCampMonth <- measCampMonthList[mc] # read raw data from Aeris output file if (measCamp == "202307"){ @@ -159,6 +163,7 @@ for (measCamp in measCampList){ outDir, currentDateTime, measCamp, + measCampMonth, expYear) # extract the two DF from list -- GitLab From e9e9e9f0e85d19b6f95e45702fd88d15d059933c Mon Sep 17 00:00:00 2001 From: Jan Engel MPI-BGC <jengel@bgc-jena.mpg.de> Date: Fri, 23 Feb 2024 22:07:12 +0100 Subject: [PATCH 24/24] minor improvement of docu --- preprocessing/run_all_main.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/preprocessing/run_all_main.R b/preprocessing/run_all_main.R index a5cf4ed..1ad420f 100644 --- a/preprocessing/run_all_main.R +++ b/preprocessing/run_all_main.R @@ -12,9 +12,9 @@ # 1. Script settings ---- -# list of measurement campaigns to run [YYYYMM] +# list of measurement campaigns to run [YYYYMM] and [month] measCampList <- c("202305", "202307", "202309") -measCampMonthList <- c("May", "July", "September") +measCampMonthList <- c("May", "July", "September") # must be consistent with 'measCampList' # gas type to process [T,F] process_co2 <- "F" -- GitLab