Toggle Light / Dark / Auto color theme
Toggle table of contents sidebar
Source code for cowidev.vax.incremental.guernsey
import pandas as pd
from cowidev.utils.clean import extract_clean_date , clean_count
from cowidev.utils.web.scraping import get_soup
from cowidev.vax.utils.incremental import enrich_data , increment
[docs]
class Guernsey :
source_url = "https://covid19.gov.gg/guidance/vaccine/stats"
location = "Guernsey"
_regex_date = r "This page was last updated on (\d{1,2} [A-Za-z]+ 202\d)"
[docs]
def read ( self ) -> pd . Series :
soup = get_soup ( self . source_url )
df = self . parse_data ( soup )
# print(df)
return df
[docs]
def parse_data ( self , soup ):
# Get table
tables = soup . find_all ( "table" )
ds = pd . read_html ( str ( tables [ 0 ]))[ 0 ] . squeeze ()
# print(ds.loc[ds[0] == "Total doses", 1].values[0])
# Rename, add/remove columns
return pd . Series (
{
"date" : extract_clean_date (
text = str ( soup . text ), regex = self . _regex_date , date_format = " %d %B %Y" , lang = "en"
),
"total_vaccinations" : clean_count (
ds . loc [ ds [ 0 ] == "Total doses" , 1 ] . values [ 0 ] . replace ( "*" , "" ),
),
}
)
[docs]
def pipe_location ( self , ds : pd . Series ) -> pd . Series :
return enrich_data ( ds , "location" , self . location )
[docs]
def pipe_vaccine ( self , ds : pd . Series ) -> pd . Series :
return enrich_data ( ds , "vaccine" , "Moderna, Oxford/AstraZeneca, Pfizer/BioNTech" )
[docs]
def pipe_source ( self , ds : pd . Series ) -> pd . Series :
return enrich_data ( ds , "source_url" , self . source_url )
[docs]
def pipeline ( self , ds : pd . Series ) -> pd . Series :
return ds . pipe ( self . pipe_location ) . pipe ( self . pipe_vaccine ) . pipe ( self . pipe_source )
[docs]
def export ( self ):
"""Generalized."""
data = self . read () . pipe ( self . pipeline )
increment (
location = data [ "location" ],
total_vaccinations = data [ "total_vaccinations" ],
date = data [ "date" ],
source_url = data [ "source_url" ],
vaccine = data [ "vaccine" ],
)
[docs]
def main ():
Guernsey () . export ()