Code Monkey home page Code Monkey logo

clean_national_covid_data's Introduction

Clean_national_covid_data

import pandas as pd
import re
import requests
from bs4 import BeautifulSoup

#将数据转为list

data['qz'] = data['qz'].apply(lambda x:x.split(','))
data['wzz'] = data['wzz'].apply(lambda x:x.split(','))

#clean data

def clean(data):
    shuzi = []
    place = []
    num = len(data)
    pattern = r'\d+'
    pattern2 = r'[\u4e00-\u9fa5]+'
    
    for i in range(0,num):
        ii = re.findall(pattern,data[i])
        shuzi.append(ii[0])
        
        iii = re.findall(pattern2,data[i])
        place.append(iii[0])
    
    result = {
        'place':place,
        'num':shuzi
    }
    
    result = pd.DataFrame(result)
    return result
    
def gather(data):
   
   num2 = len(data)
   linshi = clean(data['qz'][0])
   linshi2 = clean(data['qz'][1])
   result = pd.merge(linshi,linshi2,on='place',how = 'outer')
   for i in range(2,num2):
       linshi3 = clean(data['qz'][i])
       result = pd.merge(result,linshi3,on='place',how = 'outer')
   
   result =result.fillna(0)
   
   return result
def gather2(data):
   
   num2 = len(data)
   linshi4 = clean(data['wzz'][0])
   linshi5 = clean(data['wzz'][1])
   result2 = pd.merge(linshi4,linshi5,on='place',how = 'outer')
   for i in range(2,num2):
       linshi6 = clean(data['wzz'][i])
       result2 = pd.merge(result2,linshi6,on='place',how = 'outer')
   
   result2 =result2.fillna(0)
   
   return result2
qz_result = gather(data)
wzz_result = gather2(data)

#read again

sta1 = pd.read_excel('qz.xlsx')
sta2 = pd.read_excel('wzz.xlsx')

#计算感染值,就是无症状+确诊

sta3 = pd.concat([sta1,sta2])
sta3 = sta3.groupby(sta3.place).sum().reset_index()
sta3.to_excel('all.xlsx')

#单纯看每天的数据

import pandas as pd
import re
data = pd.read_excel('covid.xlsx')
data['qz'] = data['qz'].apply(lambda x:x.split(','))
data['wzz'] = data['wzz'].apply(lambda x:x.split(','))

def clean(data):
    shuzi = []
    place = []
    num = len(data)
    pattern = r'\d+'
    pattern2 = r'[\u4e00-\u9fa5]+'
    
    for i in range(0,num):
        ii = re.findall(pattern,data[i])
        shuzi.append(ii[0])
        
        iii = re.findall(pattern2,data[i])
        place.append(iii[0])
    
    result = {
        'place':place,
        'num':shuzi
    }
    
    result = pd.DataFrame(result)
    return result
    
qz2 = clean(data['qz'][0])
wzz2 = clean(data['wzz'][0])
result_qz = pd.merge(qz2,wzz2,on='place',how = 'outer')
result_qz = result_qz.fillna(0)

#上海各区的数据

def count(shuzi):
    #提取出区
    num =[]
    qu=[]
    shuzi=shuzi.dropna()
    for i in shuzi:
        ii = i.split(',')
        num.append(ii[0])
        iii = ii[1].replace('居住于','')
        qu.append(iii)
    data_a={
    'qu':qu,
    'num':num
    }
    
    data_a=pd.DataFrame(data_a)
    
    #提取出数字
    shuzi = []
    for i in data_a['num']:
        age = re.findall('\d+',i)
        if len(age)==2:
            bb = int(age[1])-int(age[0])+1
            shuzi.append(bb)
        else:
            shuzi.append(1)
    
    data_a['shuzi']=shuzi
    
    data_b = {
    'qu':data_a['qu'],
    '4/28':data_a['shuzi']
    }
    
    data_b=pd.DataFrame(data_b)
    
    data_c=data_b.groupby('qu')['4/28'].sum()
    
    data_c=pd.DataFrame(data_c)
    return data_c

que_1 = count(data3['q1'])
wu_1 = count(data3['w1'])
data_result = pd.merge(que_1,wu_1,on='qu',how='outer')
data_result = data_result.fillna(0)

clean_national_covid_data's People

Contributors

lianshawn avatar

Watchers

James Cloos avatar  avatar

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.