Toggle Light / Dark / Auto color theme
Toggle table of contents sidebar
Source code for cowidev.vax.incremental.serbia
import re
from bs4 import BeautifulSoup
import pandas as pd
from cowidev.utils.clean import clean_count , clean_date
from cowidev.utils.web.scraping import get_soup
from cowidev.vax.utils.incremental import enrich_data , increment
[docs]
class Serbia :
def __init__ ( self ):
self . location = "Serbia"
self . source_url = "https://vakcinacija.gov.rs/"
self . regex = {
"metrics" : (
r "Број доза: ([\d\.]+)\s?(?:–|-) прва доза ([\d\.]+), друга доза ([\d\.]+), трећа доза ([\d\.]+)"
),
"date" : r "ажурирано .*" ,
}
[docs]
def read ( self ) -> pd . Series :
soup = get_soup ( self . source_url )
(
total_vaccinations ,
people_vaccinated ,
people_fully_vaccinated ,
total_boosters ,
) = self . _parse_metrics ( soup )
return pd . Series (
{
"total_vaccinations" : total_vaccinations ,
"people_vaccinated" : people_vaccinated ,
"people_fully_vaccinated" : people_fully_vaccinated ,
"total_boosters" : total_boosters ,
"source_url" : self . source_url ,
"date" : self . _parse_date ( soup ),
}
)
[docs]
def _parse_metrics ( self , soup : BeautifulSoup ):
match = re . search ( self . regex [ "metrics" ], soup . text )
total_vaccinations = clean_count ( match . group ( 1 ))
people_vaccinated = clean_count ( match . group ( 2 ))
people_fully_vaccinated = clean_count ( match . group ( 3 ))
total_boosters = clean_count ( match . group ( 4 ))
return total_vaccinations , people_vaccinated , people_fully_vaccinated , total_boosters
[docs]
def _parse_date ( self , soup : BeautifulSoup ) -> str :
elems = soup . find_all ( "p" )
x = []
for elem in elems :
if elem . find ( text = re . compile ( self . regex [ "date" ])):
x . append ( elem )
if len ( x ) > 1 :
raise ValueError ( "Format of source has changed" )
date_str = clean_date ( x [ 0 ] . text . strip (), "ажурирано %d .%m.%Y" )
return date_str
[docs]
def pipe_location ( self , ds : pd . Series ) -> pd . Series :
return enrich_data ( ds , "location" , self . location )
[docs]
def pipe_vaccine ( self , ds : pd . Series ) -> pd . Series :
return enrich_data (
ds ,
"vaccine" ,
"Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm/Beijing, Sputnik V" ,
)
[docs]
def pipeline ( self , ds : pd . Series ) -> pd . Series :
return ds . pipe ( self . pipe_vaccine ) . pipe ( self . pipe_location )
[docs]
def export ( self ):
data = self . read () . pipe ( self . pipeline )
increment (
location = data [ "location" ],
total_vaccinations = data [ "total_vaccinations" ],
people_vaccinated = data [ "people_vaccinated" ],
people_fully_vaccinated = data [ "people_fully_vaccinated" ],
total_boosters = data [ "total_boosters" ],
date = data [ "date" ],
source_url = data [ "source_url" ],
vaccine = data [ "vaccine" ],
)
[docs]
def main ():
Serbia () . export ()