Toggle Light / Dark / Auto color theme
Toggle table of contents sidebar
Source code for cowidev.testing.batch.germany
from datetime import datetime
import tempfile
import pandas as pd
import requests
from cowidev.testing import CountryTestBase
[docs]
def read_xlsx_from_url ( url , ** kwargs ):
headers = { "User-Agent" : "Mozilla/5.0 (X11; Linux i686)" }
response = requests . get ( url , headers = headers )
with tempfile . NamedTemporaryFile ( mode = "w+" , delete = False ) as tmp :
with open ( tmp . name , "wb" ) as f :
f . write ( response . content )
df = pd . read_excel ( tmp . name , ** kwargs )
df = df . dropna ( how = "all" )
return df
[docs]
class Germany ( CountryTestBase ):
location : str = "Germany"
source_url : str = "https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Daten/Testzahlen-gesamt.xlsx?__blob=publicationFile"
[docs]
def read ( self ):
df = read_xlsx_from_url ( self . source_url , sheet_name = "1_Testzahlerfassung" )
mask = df . Kalenderwoche . str . match ( r "\d{1,2}/\d {4} " )
df = df [ mask ]
return df
[docs]
def pipeline ( self , df : pd . DataFrame ) -> pd . DataFrame :
df = df . assign (
** {
"Date" : df . Kalenderwoche . apply (
lambda x : datetime . strptime ( x + " +0" , "%V/ %G +%w" ) . strftime ( "%Y-%m- %d " )
),
"Cumulative total" : df [ "Anzahl Testungen" ] . cumsum (),
"Positive rate" : ( df [ "Positivenanteil (%)" ] / 100 ) . round ( 3 ),
"Source URL" : self . source_url ,
"Source label" : "Robert Koch Institut" ,
"Units" : "tests performed" ,
"Country" : self . location ,
"Notes" : pd . NA ,
}
) . sort_values ( "Date" )
df = df [
[
"Date" ,
"Cumulative total" ,
"Positive rate" ,
"Source URL" ,
"Source label" ,
"Units" ,
"Country" ,
"Notes" ,
]
]
return df
[docs]
def export ( self ):
df = self . read () . pipe ( self . pipeline )
self . export_datafile ( df )
[docs]
def main ():
Germany () . export ()