library(zoo)
library(lubridate)
library(readxl)
library(scales)
library(grid)
library(gridExtra)
library(janitor)
library(ggpubr)
library(cowplot)
library(patchwork)
library(ggthemes)
library(directlabels)
library(pdfetch)
library(gghighlight)
library(viridis)
library(tidyverse)
library(ggrepel)
library(xml2)
library(rvest)
options(dplyr.summarise.inform = FALSE)
data_fetch<-function(key, cat){
#key<-KEY
#cat=476336
ifelse(cat==999999999,
url <- paste("https://api.eia.gov/category/?api_key=",
key, "&out=xml", sep="" ),
url <- paste("https://api.eia.gov/category/?api_key=",
key, "&category_id=", cat, "&out=xml", sep="" )
)
x <- read_xml(url)
doc <- XML::xmlParse(file=x)
Parent_Category <- tryCatch(XML::xmlToDataFrame(,stringsAsFactors = F,nodes =
XML::getNodeSet(doc, "//category/parent_category_id")),
warning=function(w) FALSE, error=function(w) FALSE)
Sub_Categories <- XML::xmlToDataFrame(,stringsAsFactors = F,nodes =
XML::getNodeSet(doc, "//childcategories/row"))
Series_IDs <- XML::xmlToDataFrame(nodes =
XML::getNodeSet(doc, "///childseries/row"),stringsAsFactors = F)
Categories <- list(Parent_Category, Sub_Categories, Series_IDs)
names(Categories) <- c("Parent_Category", "Sub_Categories", "Series_IDs")
Categories
}
get_children<-function(category_id=476336){
subs<-data_fetch(KEY,cat=category_id)
sub_cats<-subs$Sub_Categories
#build list from sub_cats
cat_store <- list()
cat_count<-1
for (cat in sub_cats$category_id) {
#cat<-sub_cats$category_id[1]
series<-data_fetch(KEY,cat=cat)
cat_store[[cat_count]]<-series$Series_IDs
cat_count<-cat_count+1
}
data.frame(do.call(rbind,cat_store))
}
#get_children()
get_series<-function(category_id=476336){
#series,name,f,units,updated
subs<-data_fetch(KEY,cat=category_id)
subs$Series_IDs
}
#get_series()
pd_fix<-function(data,name){
data<-data.frame(date=index(data), coredata(data))
data$date<-ymd(data$date)
data <- setNames(data, c("date",name))
}
EIA_to_DF<-function(series_info){
data<- pdfetch_EIA(series_info$series_id,KEY)
pd_fix(data,series_info$name)
}
eia_aeo_comp<-function(start_year=2014,end_year=2022,api_series=".SUP_NA_LFL_NA_DCP_NA_USA_MILLBRLPDY.A",
label="US Total Crude Oil Production",
units="mmbbl/d",
history=FALSE,
hist_series="PET.MCRFPUS2.A",
hist_conversion=1,
hist_year=1950,
zero_y=TRUE
){ #use oil as the default
#testing
#api_series<-".GEN_NA_ELEP_NA_SLR_PHTVL_NA_BLNKWH.A"
#start_year=2015
#end_year=2022
#label="Oil Production"
#units="mmbbl/d"
#hist_series<-"PET.MCRFPUS2.A"
#hist_conversion=1000
series<-paste("AEO.",seq(start_year,end_year),".REF",seq(start_year,end_year),api_series,sep="")
labels<-paste(seq(start_year,end_year)," AEO",sep="")
elements=end_year-start_year+1
data<-pd_fix(pdfetch_EIA(series,KEY),labels)%>%
pivot_longer(-date,names_to = "variable")
plot<-ggplot(data)+
geom_line(aes(date,value,group=variable,color=variable,size=variable==paste(end_year,"AEO")),lty="31")+
#geom_point(data=data %>% filter(variable==paste(end_year,"AEO")),aes(date,value,group=variable,color=variable),size=2.25)+
scale_y_continuous(breaks=pretty_breaks(),expand=c(0,0))+
scale_x_date(breaks=pretty_breaks(n=10),expand=c(0,0))+
scale_color_viridis("",discrete = T,option="A",direction = -1,end = .9)+
scale_size_manual("",values=c(1,1.5))+
scale_linetype_manual("",values=c("solid"))+
theme_minimal()+weekly_graphs()+
theme(axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)))+
guides(linetype=guide_legend(order = 1,keywidth = unit(1.6,"cm")),
size="none",
#shape = guide_legend(keywidth = unit(1.6,"cm"),nrow = 2),
#linetype = guide_legend(keywidth = unit(1.6,"cm"),nrow = 2),
#colour = guide_legend(keywidth = unit(1.6,"cm"),override.aes = list(lty = "11") ,nrow = 2),
colour = guide_legend(keywidth = unit(1.6,"cm"),nrow = trunc(elements/6)+1,
override.aes = list(size=c(rep(1,elements-1),1.5))),
#fill = guide_legend(keywidth = unit(1.6,"cm"),nrow = 2)
NULL)
if(zero_y)
plot<-plot+expand_limits(y=0)
#get historical data
if(history)
{
hist_data<-pd_fix(pdfetch_EIA(hist_series,KEY),"Historical data")%>%
pivot_longer(-date,names_to = "variable")%>%
mutate(value=value/hist_conversion)%>%
filter(year(date)>=hist_year)
plot<-plot+
geom_line(data=hist_data,aes(date,value,lty="Historical Data"),size=1)+
labs(y=paste(label," (",units,")",sep=""),x="",
title=paste("Historical",label,"and EIA AEO Reference Case Projections"),
caption="Source: Data via EIA AEO, graph by Andrew Leach.")
}
if(!history){
plot<-plot+
labs(y=paste(label," (",units,")",sep=""),x="",
title=paste("EIA Annual Energy Outlook",label,"Forecasts"),
caption="Source: Data via EIA AEO, graph by Andrew Leach.")
}
plot
}
Oil Production, Trade, and Prices
US Oil Production
eia_aeo_comp(history = TRUE,hist_conversion = 1000)
US Oil Imports
eia_aeo_comp(start_year = 2014,api_series =".TRAD_NA_LFL_NA_GIM_NA_USA_MILLBRLPDY.A",label = "US Gross Crude Oil Imports",
units = "million barrels per day",
history = TRUE,
hist_series = "PET.MCRIMUS2.A",
hist_conversion = 1000,
hist_year=1950)
US Oil Exports
eia_aeo_comp(start_year = 2014,api_series =".TRAD_NA_LFL_NA_EXP_NA_USA_MILLBRLPDY.A",label = "US Gross Crude Oil Exports",
units = "million barrels per day",
history = TRUE,
hist_series = "PET.MCREXUS2.A",
hist_conversion = 1000,
hist_year=2010)
WTI Oil Prices
eia_aeo_comp(start_year = 2015,api_series =".PRCE_NA_NA_NA_CR_WTI_USA_NDLRPBRL.A",label = "WTI (Cushing) Nominal Spot Price",
units = "$/bbl",
history = TRUE,
hist_series = "PET.RWTC.M",
hist_conversion = 1,
hist_year=1990)
Natural Gas Production, Trade, and
Prices
US Natural Gas Production
eia_aeo_comp(start_year = 2014,api_series =".SUP_DPR_NA_NA_NG_TOT_USA_TRLCF.A",label = "US Dry Natural Gas Production",
units = "TCF",
history = TRUE,
hist_series = "NG.N9070US2.M",
hist_conversion = 10^6/12,
hist_year=1950)
US Natural Gas Imports
eia_aeo_comp(start_year = 2020,api_series =".SUP_IMP_NA_NA_NG_NA_NA_TRLCF.A",label = "US Gross Natural Gas Imports",
units = "TCF",
history = TRUE,
hist_series = "NG.N9100US2.M",
hist_conversion = 10^6/12,
hist_year=1950)
US Natural Gas Exports
eia_aeo_comp(start_year = 2014,api_series =".SUP_EXPT_NA_NA_NG_NA_NA_TRLCF.A",label = "US Gross Natural Gas Exports",
units = "TCF",
history = TRUE,
hist_series = "NG.N9130US2.M",
hist_conversion = 10^6/12,
hist_year=2010)
Henry Hub Natural Gas Prices
eia_aeo_comp(start_year = 2014,api_series =".PRCE_HHP_NA_NA_NG_NA_USA_NDLRPMBTU.A",label = "Henry Hub Nominal Spot Price",
units = "$/MMBTU",
history = TRUE,
hist_series = "NG.RNGWHHD.M",
hist_conversion = 1,
hist_year=1990)
Trade Flows
#AEO Gas trade
export_set<-c("Exports : Pipeline Exports to Canada",
"Exports : Pipeline Exports to Mexico",
"Exports : Liquefied Natural Gas Exports")
import_set<-c("Imports : Pipeline Imports from Canada",
"Imports : Pipeline Imports from Mexico",
"Imports : Liquefied Natural Gas Imports")
#imports by data_series
#http://api.eia.gov/category/?api_key=YOUR_API_KEY_HERE&category_id=476336
import_series<-get_children(476336)
import_series<-filter(import_series,grepl("U.S. Natural Gas Pipeline Imports From",name)|grepl("U.S. Liquefied Natural Gas Imports,",name),!grepl("Price",name),!grepl("Annual",name))
#exports by data series
#http://api.eia.gov/category/?api_key=YOUR_API_KEY_HERE&category_id=476803
export_series<-get_children(476802)
export_series<-filter(export_series,grepl("U.S. Natural Gas Pipeline Exports to",name)|grepl("Liquefied U.S. Natural Gas Exports,",name),!grepl("Price",name),!grepl("Annual",name))
gas_trade<-rbind(import_series,export_series)
gas_trade_data<-EIA_to_DF(gas_trade)
#reset to match AEO Names
names(gas_trade_data)<-c("date",
"Imports : Pipeline Imports from Canada",
"Imports : Pipeline Imports from Mexico",
"Imports : Liquefied Natural Gas Imports",
"Exports : Pipeline Exports to Canada",
"Exports : Pipeline Exports to Mexico",
"Exports : Liquefied Natural Gas Exports")
#gas_trade_data$`Imports : Pipeline Imports`<-gas_trade_data$`Imports : Pipeline Imports from Canada`+gas_trade_data$`Imports : Pipeline Imports from Mexico`
#gas_trade_data$`Exports : Pipeline Exports`<-gas_trade_data$`Exports : Pipeline Exports to Canada`+gas_trade_data$`Exports : Pipeline Exports to Mexico`
#gas_trade_data$`Pipeline Net Imports`<-gas_trade_data$`Imports : Pipeline Imports`- gas_trade_data$`Exports : Pipeline Exports`
#gas_trade_data$`Liquefied Natural Gas Net Imports`<-gas_trade_data$`Imports : Liquefied Natural Gas Imports` - gas_trade_data$`Exports : Liquefied Natural Gas Exports`
gas_trade_data<-gas_trade_data %>% pivot_longer(-date,names_to ="series")
#make it annual
gas_trade_data<-gas_trade_data %>% mutate(year=year(date)) %>% group_by(year,series) %>%
summarise(value=12*mean(value,na.rm = T)) %>% ungroup %>% #now annual values based on mean non-na month
mutate(date=ymd(paste(year,12,31,sep = "-")),year=NULL)
#adjust to TCF per year
gas_trade_data$value<-gas_trade_data$value/10^6
history_dates<-tibble(date=unique(gas_trade_data$date))
gas_trade_data<-gas_trade_data %>% filter(series %in% import_set | series %in% export_set) %>%
mutate(value=ifelse(series %in% import_set,-1*value,value))
#build AEO list
subs<-data_fetch(KEY,cat=3162260)
#AEO_data$test<-"SUP_IMP_LIQ_NA_NG_NA_NA_TRLCF.A"
AEO_data<-subs$Series_IDs
AEO_data<-NULL
for(j in seq(2014,2019)){
#print(paste("working on AEO",j,sep=""))
work_data<-subs$Series_IDs
work_data$series_id<-gsub("2019",j,work_data$series_id)
work_data$name<-gsub("2019",j,work_data$name)
AEO_data<-rbind(AEO_data,work_data)
}
#get the 2022 defns
subs<-data_fetch(KEY,cat=4442481)
for(j in seq(2020,2022)){
#print(paste("working on AEO",j,sep=""))
work_data<-subs$Series_IDs
work_data$series_id<-gsub("2022",j,work_data$series_id)
work_data$name<-gsub("2022",j,work_data$name)
AEO_data<-rbind(AEO_data,work_data)
}
AEO_data$name<-gsub("Natural Gas : Volumes : ","",AEO_data$name)
test<- pdfetch_EIA(AEO_data$series_id,KEY)
series_data<-data.frame(date=index(test), coredata(test))
series_data$date<-ymd(series_data$date)
series_data <- setNames(series_data, c("date",AEO_data$name))
suppressMessages(series_data <-series_data %>% full_join(history_dates))#include the dates for which we have history. They will be NA now, but we'll stack them in later
#melt it
series_data<-series_data %>% pivot_longer(-c(date),names_to="variable") #%>% na.omit()
#series_data$`Pipeline Net Imports`<-series_data$`Imports : Pipeline Imports`- gas_trade_data$`Exports : Pipeline Exports`
#series_data$`Liquefied Natural Gas Net Imports`<-gas_trade_data$`Imports : Liquefied Natural Gas Imports` - gas_trade_data$`Exports : Liquefied Natural Gas Exports`
#get the year for each
name_split<-do.call(rbind,strsplit(as.character(series_data$variable),", "))
series_data$series<-name_split[,1]
#series_data$case<-name_split[,2]
series_data$aeo_year<-name_split[,3]
suppressMessages(joint_data<-series_data%>%filter(year(date)>=2000)%>%left_join(gas_trade_data%>%rename(history=value)))
joint_data<-joint_data %>% filter(series %in% import_set | series %in% export_set) %>%
mutate(value=ifelse(series %in% import_set,-1*value,value),
#history=ifelse(series %in% import_set,-1*history,history)
NULL)
#strip out history after forecast date
joint_data<-joint_data %>%
mutate(year=as.numeric(gsub("AEO","",aeo_year)),
history=ifelse(year(date)<as.numeric(gsub("AEO","",aeo_year)),history,NA),
series=gsub("Exports : ","",series),
series=gsub("Imports : ","",series),
series=factor(series,levels=c("Liquefied Natural Gas Exports", "Pipeline Exports to Canada", "Pipeline Exports to Mexico",
"Liquefied Natural Gas Imports", "Pipeline Imports from Canada" ,"Pipeline Imports from Mexico" ))
)
gas_trade_plot<-ggplot(joint_data)+
geom_area(aes(date,value,fill=series),position="stack",alpha=0.6,color="black",size=0.25)+
geom_area(aes(date,history,fill=series),position="stack",color="black",size=0.25)+
facet_wrap(~aeo_year,nrow = 1)+
scale_y_continuous(breaks=pretty_breaks(),expand=c(0,0))+
scale_x_date(breaks=pretty_breaks(n=5),expand=c(0,0))+
#scale_color_viridis("",discrete = T,option="A",direction = -1,end = .9)+
scale_fill_viridis("",discrete = T,option="A",direction = -1,end = .9)+
scale_size_manual("",values=c(1,1.5))+
scale_linetype_manual("",values=c("solid"))+
expand_limits(x=ymd("1999-10-01"))+
theme_minimal()+weekly_graphs()+
theme(axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)))+
theme(axis.text.x = element_text(angle=90,hjust = 0.5,vjust = 0.5))+
guides(linetype=guide_legend(order = 1,keywidth = unit(1.6,"cm")),
size="none",
#shape = guide_legend(keywidth = unit(1.6,"cm"),nrow = 2),
#linetype = guide_legend(keywidth = unit(1.6,"cm"),nrow = 2),
#colour = guide_legend(keywidth = unit(1.6,"cm"),override.aes = list(lty = "11") ,nrow = 2),
fill = guide_legend(keywidth = unit(1,"cm"),nrow = 2,byrow = TRUE),
#fill = guide_legend(keywidth = unit(1.6,"cm"),nrow = 2)
NULL)+
labs(y=paste("Annual Net Outflows (TCF)",sep=""),x="",
title=paste("EIA Annual Energy Outlook Natural Gas Trade Projections"),
subtitle=paste("Historical data up to forecast date in darker shade, forecasts shown with more transparency"),
caption="Source: Data via EIA, graph by Andrew Leach.")
gas_trade_plot
Electricity supply
Total Electricity Supply
eia_aeo_comp(start_year = 2015,api_series =".GEN_NA_ELEP_NA_TEG_NA_USA_BLNKWH.A",label = "Electricity Generation",
units = "billion kWh",
history = TRUE,
hist_conversion = 1000,
hist_series = "ELEC.GEN.ALL-US-99.A"
)
Solar Generation
eia_aeo_comp(start_year = 2014,api_series =".GEN_NA_ALLS_NA_SLR_NA_NA_BLNKWH.A",label = "Solar Electricity Generation",
units = "billion kWh",
history = TRUE,
hist_conversion = 1000,
hist_series = "ELEC.GEN.TSN-US-99.A"
)
Coal Generation
eia_aeo_comp(start_year = 2018,api_series =".GEN_NA_ELEP_POW_CL_NA_USA_BLNKWH.A",label = "Coal-fired Electricity Generation",
units = "billion kWh",
history = TRUE,
hist_conversion = 1000,
hist_series = "ELEC.GEN.COW-US-98.A")