#This script is made freely available under license CC-BY-4.0 # #by Prof. Patrick E Meyer. Belgium. October 2023. # #Data extraction R script V2. #function to generates datasets using parameters explained below # #It produces the following datasets: data0to14, data15to44 or data45to64 in a .csv format # #With 9 variables, namely #ZscoresCurrent (i.e. period of interest), ZscoresPast1Y (i.e. year before), ZscoresPast2Y (i.e. two years before) #Where (i.e. country), When (i.e. which period), Who (i.e. age-category), #DoseRate (i.e. the sum of rates of the first 'nbDoses' during the periods investigated), #CovDeathRate (avg 14 days ECDC covid death rate), CaseRate (avg 14 days ECDC covid case rate), both are independent of age category. # #Input parameters are #dataFolder: address of the data sources to be used #lastYear: last year to consider in the study #lastWeek: number of the last week #studyLength: total number of weeks in the study #groupOfWeeks: the number of weeks to group for each period #countries: a vector of english names of countries #nbDoses: the number of the n first doses to sum for the DoseRate variable # ##Examples of use of this function are in the generatePreprintData() function below # #reference: #Manuscript of 2022 data: All-cause Mortality During Covid-19 Vaccinations in European Active Populations #Manuscript of 2021 data: The impact of COVID-19 vaccines on all-cause mortality in EU in 2021: a machine-learning perspective generateDatasets<-function(dataFolder, lastYear,lastWeek, studyLength, groupOfWeeks,countries, nbDoses) { momoGroups<-list("0to14","15to44","45to64") #Warning few years shift with ecdc categories ecdcGroups<-list(c("Age0_4","Age5_9","Age10_14","Age15_17","Age<18"),c("Age18_24","Age25_49"),c("Age50_59","Age60_69")) zscoreFile<-"charts-z-scores-by-country" vaxFile<-"ECDC-Vax.csv" covidFile<-"ECDC-Rates.csv" ########################## Functions ##################################################################### ############################# EuroMOMO functions ############################################################## #generate the vector of periods investigated in the study generatePeriods<-function(lastyear, lastweek,groupOfWeeks,studylength){ if(lastweek<10) #2021-8 should be 2021-08 endOfPeriod<-which(levels(euromomo$week)==paste(lastyear,paste("0",lastweek,sep=""),sep="-")) else endOfPeriod<-which(levels(euromomo$week)==paste(lastyear,lastweek,sep="-")) startOfPeriod=endOfPeriod+1-studylength startingPeriods<-seq(endOfPeriod,startOfPeriod,-groupOfWeeks) levels(euromomo$week)[startingPeriods] } #function computing average zscores over a group of weeks before the given date in the specified country mymortality<-function(toYearWeek,groupOfWeeks,country){ endOfPeriod<-which(levels(euromomo$week)==toYearWeek) where<-which(euromomo$country==country) when<-which(as.numeric(euromomo$week)>(endOfPeriod-groupOfWeeks) & as.numeric(euromomo$week)<=endOfPeriod) index<-intersect(where,when) mean(euromomo$zscore[index]) } #function to compute the average zscores during the full study length in one country mortalitycountry<-function(country,periods,groupOfWeeks){ sapply(periods,mymortality,groupOfWeeks,country) } ########################### ECDC vaccination functions #################################################### #doses= columns of the ECDC dataset to sum in the doses delivered, in 2023: up to 7 doses <- c(6,8:13)[1:nbDoses] ISOcountries=c("AT","BE","CY","DK","EE", "EL","ES","FI","FR","HU","IE","IT","IS","LU","MT","NO","PT","SI","SE") names(ISOcountries)<-c("Austria","Belgium","Cyprus","Denmark","Estonia","Greece","Spain","Finland","France","Hungary", "Ireland","Italy","Israel","Luxembourg","Malta","Norway","Portugal","Slovenia","Sweden") #function to compute the number of dose delivered during the group of weeks preceding a given week, #in a country,to a set of age groups from ECDC data myvax<-function(toYearWeek,groupOfWeeks,ISOcountry,ageGroups,doses){ endOfPeriod<-which(levels(vaxdata$YearWeekISO)==paste(strsplit(toYearWeek,"-")[[1]],collapse="-W")) where<-which(vaxdata$Region==ISOcountry) when<-which(as.numeric(vaxdata$YearWeekISO)>(endOfPeriod-groupOfWeeks) & as.numeric(vaxdata$YearWeekISO)<=endOfPeriod) index<-intersect(where,when) target<-which(!is.na(match(vaxdata$TargetGroup,ageGroups))) index<-intersect(index,target) if(length(index)!=0) { res<-sum(vaxdata[index,doses]) res/sum(unique(vaxdata$Denominator[index])) #to obtain a rate for the age groups injected, #alternatively for the whole population the denominator should be vaxdata$Population[vaxdata$Region==country][1] } else { print(paste(ISOcountry,"has no injection data for this age group in this period hence a 0 value will be used")) res<-0 } } #function to compute rate of doses delivered during the full study length in one country using ECDC data and a set of age groups vaxcountry<-function(countries,periods,groupOfWeeks, ageGroups, doses){ sapply(periods,myvax,groupOfWeeks,ISOcountries[countries],ageGroups, doses) } ######################## Functions for ECDC COVID-19 cases and deaths ####################### mycaserate<-function(toYearWeek,groupOfWeeks,country,indic){ endOfPeriod<-which(levels(coviddata$year_week)==toYearWeek) where<-which(coviddata$country==country) when<-which(as.numeric(coviddata$year_week)>(endOfPeriod-groupOfWeeks) & as.numeric(coviddata$year_week)<=endOfPeriod) index<-intersect(where,when) rate<-which(coviddata$indicator==indic) index<-intersect(index,rate) if(length(index)!=0) res<-mean(coviddata$rate_14_day[index],na.rm=T) else { print(paste(country,"has no rate data for this age group in this period hence a 0 value will be used")) res<-0 } res } ratecountry<-function(country,periods,groupOfWeeks,indic){ sapply(periods,mycaserate,groupOfWeeks,country,indic) } ######################## END of Function Definitions ############################################### ######################## Beginning of datasets generation ########################################## for(ageGroup in 1:3) { momoAges<-momoGroups[[ageGroup]] ecdcAges<-ecdcGroups[[ageGroup]] euromomo=read.csv(paste(dataFolder,zscoreFile,"-",momoAges,".csv",sep=""),sep=";") #computations independent of age groups (computed only the first time) if(ageGroup==1) { vaxdata=read.csv(paste(dataFolder,vaxFile,sep="")) coviddata=read.csv(paste(dataFolder,covidFile,sep="")) periods<-generatePeriods(lastYear,lastWeek,groupOfWeeks,studyLength) past2Y<-generatePeriods(lastYear-2,lastWeek,groupOfWeeks,studyLength) past1Y<-generatePeriods(lastYear-1,lastWeek,groupOfWeeks,studyLength) caseRate<-sapply(countries,ratecountry,periods,groupOfWeeks,"cases") covDeathRate<-sapply(countries,ratecountry,periods,groupOfWeeks,"deaths") } ##################################################### 1. EuroMOMO #################### matcountries19<-sapply(countries,mortalitycountry,past2Y,groupOfWeeks) euromomoZscore2019<-c(matcountries19) matcountries20<-sapply(countries,mortalitycountry,past1Y,groupOfWeeks) euromomoZscore2020<-c(matcountries20) matcountries21<-sapply(countries,mortalitycountry,periods,groupOfWeeks) euromomoZscore2021<-c(matcountries21) When<-periods Where<-c(sapply(countries,rep,length(When))) Who<-rep(levels(euromomo$group), length(When)) dataset<-data.frame(ZscoreCurrent=euromomoZscore2021,When,Who,Where,ZscorePast2Y=euromomoZscore2019,ZscorePast1Y=euromomoZscore2020) ################################################# 2. ECDC vax ############### doseRate<-sapply(countries,vaxcountry,periods,groupOfWeeks,ecdcAges,doses) dataset<-data.frame(dataset,DoseRate=c(doseRate)) ################################################## 3. ECDC Covid ############ dataset<-data.frame(dataset,CaseRate=c(caseRate),CovDeathRate=c(covDeathRate)) write.csv(dataset,row.names=F,file=paste(dataFolder,"data",momoAges,".csv",sep="")) } } generatePreprintData<-function(dataFolder="~/Desktop/vax-data-7oct2023/", dataParameters="2022"){ if(dataParameters == "2022") { lastYear<-2022 #last year to consider in the study lastWeek<-24 #number of the last week, 2022-24 corresponds to ending of ECDC worldwide rates tracking studyLength<-77 ### number of weeks in the study, 77 leads to last weeks of Dec 2020: that is the beginning of injection campaigns groupOfWeeks<-4 #### data are grouped by periods of 4 weeks #EuroMOMO set of countries intersecting with ecdc data in 2022 data sources. countries<-c("Austria","Belgium","Cyprus","Denmark","Estonia","Greece","Spain","Finland","France","Hungary", "Ireland","Italy","Luxembourg","Malta","Portugal","Slovenia","Sweden") nbDoses<-4 generateDatasets(dataFolder, lastYear,lastWeek, studyLength, groupOfWeeks,countries, nbDoses) } if(dataParameters =="2021") { # In the 2021 preprint, Israel and Norway were present in euromomo/ecdc data. # our computation of the variable DoseRate has been improved in v2 (age groups and denominator are slightly different) # Nonetheless, statistical analyses of 2021 data are still holding on updated datasets. lastYear<-2021 #last year in the study lastWeek<-46 #number of the last week of the generated data studyLength<-52 ### number of weeks in the study groupOfWeeks<-4 #### data are grouped by periods of 4 weeks #EuroMOMO set of countries intersecting with ecdc data in 2021 data sources countries<-c("Austria","Belgium","Cyprus","Denmark","Estonia","Spain","Finland","France","Hungary", "Ireland","Italy","Israel","Luxembourg","Malta","Norway","Portugal","Slovenia","Sweden") nbDoses<-2 generateDatasets(dataFolder, lastYear,lastWeek, studyLength, groupOfWeeks,countries, nbDoses)} }