百木园-与人分享,
就是让自己快乐。

Python采集1000多所世界大学排名数据,制作可视化图

前言

QS世界大学排名(QS World University Rankings)是由英国一家国际教育市场咨询公司Quacquarelli Symonds(简称QS)所发表的年度世界大学排名

采集全球大学排名数据(源码已分享,求点赞)

import requests     # 发送请求
import re
import csv

with open(\'rank.csv\', mode=\'a\', encoding=\'utf-8\', newline=\'\') as f:
    csv_writer = csv.writer(f)
    csv_writer.writerow([\'country\', \'rank\', \'region\', \'score_1\', \'score_2\', \'score_3\', \'score_4\', \'score_5\', \'score_6\', \'total_score\', \'stars\', \'university\', \'year\'])
def replace(str_):
    str_ = re.findall(\'<div class=\"td-wrap\"><div class=\"td-wrap-in\">(.*?)</div></div>\', str_)[0]
    return str_
url = \'https://www.qschina.cn/sites/default/files/qs-rankings-data/cn/2057712_indicators.txt\'
# 1. 发送请求
response = requests.get(url)
# <Response [200]>: 请求成功
# 2. 获取数据
json_data = response.json()     # Python 字典
# 3. 解析数据
# 字典
data_list = json_data[\'data\']
for i in data_list:
    country = i[\'location\']     # 国家/地区
    rank = i[\'overall_rank\']    # 排名
    region = i[\'region\']        # 大洲
    score_1 = replace(i[\'ind_76\'])       # 学术声誉
    score_2 = replace(i[\'ind_77\'])       # 雇主声誉
    score_3 = replace(i[\'ind_36\'])       # 师生比
    score_4 = replace(i[\'ind_73\'])       # 教员引用率
    score_5 = replace(i[\'ind_18\'])       # 国际教师
    score_6 = replace(i[\'ind_14\'])       # 国际学生
    total_score = replace(i[\'overall\'])       # 总分
    stars = i[\'stars\']       # 星级
    uni = i[\'uni\']       # 大学名称
    university = re.findall(\'<div class=\"td-wrap\"><div class=\"td-wrap-in\"><a href=\".*?\" class=\"uni-link\">(.*?)</a></div></div>\', uni)[0]
    year = \"2021\"       # 年份
    print(country, rank, region, score_1, score_2, score_3, score_4, score_5, score_6, total_score, stars, university, year)
    with open(\'rank.csv\', mode=\'a\', encoding=\'utf-8\', newline=\'\') as f:
        csv_writer = csv.writer(f)
        csv_writer.writerow([country, rank, region, score_1, score_2, score_3, score_4, score_5, score_6, total_score, stars, university, year])

 


可视化展示

导入所需模块

from pyecharts.charts import *
from pyecharts import options as opts
from pyecharts.commons.utils import JsCode
from pyecharts.components import Table
import re
import pandas as pd

 

导入数据

df = pd.read_csv(\'rank.csv\')

# 香港,澳门与中国大陆地区等在榜单中是分开的记录的,这边都归为china
df[\'loc\'] = df[\'country\']
df[\'country\'].replace([\'China (Mainland)\', \'Hong Kong SAR\', \'Taiwan\', \'Macau SAR\'],\'China\',inplace=True)

 

2021年世界大学排名(QS) TOP 100

bar = (Bar()
       .add_xaxis(university)
       .add_yaxis(\'\', score, category_gap=\'30%\')
       .set_global_opts(title_opts=opts.TitleOpts(title=\"2021年世界大学排名(QS) TOP 100\",
                                                  pos_left=\"center\",
                                                  title_textstyle_opts=opts.TextStyleOpts(font_size=20)),
                        datazoom_opts=opts.DataZoomOpts(range_start=70, range_end=100, orient=\'vertical\'),
                        visualmap_opts=opts.VisualMapOpts(is_show=False, max_=100, min_=60, dimension=0,
                                range_color=[\'#00FFFF\', \'#FF7F50\']),
                        legend_opts=opts.LegendOpts(is_show=False),
                        xaxis_opts=opts.AxisOpts(is_show=False, is_scale=True),
                        yaxis_opts=opts.AxisOpts(axistick_opts=opts.AxisTickOpts(is_show=False),
                                                 axisline_opts=opts.AxisLineOpts(is_show=False),
                                                 axislabel_opts=opts.LabelOpts(font_size=12)))
       .set_series_opts(label_opts=opts.LabelOpts(is_show=True,
                                                  position=\'right\',
                                                  font_style=\'italic\'),
                        itemstyle_opts={\"normal\": {
                                                    \"barBorderRadius\": [30, 30, 30, 30],
                                                    \'shadowBlur\': 10,
                                                    \'shadowColor\': \'rgba(120, 36, 50, 0.5)\',
                                                    \'shadowOffsetY\': 5,
                                                }
                                       }
).reversal_axis())

grid = (
        Grid(init_opts=opts.InitOpts(theme=\'purple-passion\', width=\'1000px\', height=\'1200px\'))
        .add(bar, grid_opts=opts.GridOpts(pos_right=\'10%\', pos_left=\'20%\'))
    )
grid.render_notebook()

 

TOP 500中的中国大学

bar = (Bar()
       .add_xaxis(university)
       .add_yaxis(\'\', score, category_gap=\'30%\')
       .set_global_opts(title_opts=opts.TitleOpts(title=\"TOP 500中的中国大学\",
                                                  pos_left=\"center\",
                                                  title_textstyle_opts=opts.TextStyleOpts(font_size=20)),
                        datazoom_opts=opts.DataZoomOpts(range_start=50, range_end=100, orient=\'vertical\'),
                        visualmap_opts=opts.VisualMapOpts(is_show=False, max_=90, min_=20, dimension=0,
                                range_color=[\'#00FFFF\', \'#FF7F50\']),
                        legend_opts=opts.LegendOpts(is_show=False),
                        xaxis_opts=opts.AxisOpts(is_show=False, is_scale=True),
                        yaxis_opts=opts.AxisOpts(axistick_opts=opts.AxisTickOpts(is_show=False),
                                                 axisline_opts=opts.AxisLineOpts(is_show=False),
                                                 axislabel_opts=opts.LabelOpts(font_size=12)))
       .set_series_opts(label_opts=opts.LabelOpts(is_show=True,
                                                  position=\'right\',
                                                  font_style=\'italic\'),
                        itemstyle_opts={\"normal\": {
                                                    \"barBorderRadius\": [30, 30, 30, 30],
                                                    \'shadowBlur\': 10,
                                                    \'shadowColor\': \'rgba(120, 36, 50, 0.5)\',
                                                    \'shadowOffsetY\': 5,
                                                }
                                       }
).reversal_axis())

grid = (
        Grid(init_opts=opts.InitOpts(theme=\'purple-passion\', width=\'1000px\', height=\'1200px\'))
        .add(bar, grid_opts=opts.GridOpts(pos_right=\'10%\', pos_left=\'20%\'))
    )
grid.render_notebook()

 

TOP 1000高校按大洲分布

t_data = df[(df.year==2021) & (df[\'rank\']<=1000)]
t_data = t_data.groupby([\'region\'])[\'university\'].count().reset_index()
t_data.columns = [\'region\', \'num\']
t_data = t_data.sort_values(by=\"num\" , ascending=False) 


bar = (Bar(init_opts=opts.InitOpts(theme=\'purple-passion\', width=\'1000px\', height=\'600px\'))
       .add_xaxis(t_data[\'region\'].tolist())
       .add_yaxis(\'出现次数\', t_data[\'num\'].tolist(), category_gap=\'50%\')
       .set_global_opts(title_opts=opts.TitleOpts(title=\"TOP 1000高校按大洲分布\",
                                                  pos_left=\"center\",
                                                  title_textstyle_opts=opts.TextStyleOpts(font_size=20)),
                        visualmap_opts=opts.VisualMapOpts(is_show=False, max_=300, min_=0, dimension=1,
                                range_color=[\'#00FFFF\', \'#FF7F50\']),
                        legend_opts=opts.LegendOpts(is_show=False),
                        xaxis_opts=opts.AxisOpts(axistick_opts=opts.AxisTickOpts(is_show=False),
                                                 axisline_opts=opts.AxisLineOpts(is_show=False),
                                                 axislabel_opts=opts.LabelOpts(font_size=15)),
                        yaxis_opts=opts.AxisOpts(is_show=False))
       .set_series_opts(label_opts=opts.LabelOpts(is_show=True,
                                                  position=\'top\',
                                                  font_size=15,
                                                  font_style=\'italic\'),
                        itemstyle_opts={\"normal\": {
                                                    \"barBorderRadius\": [30, 30, 30, 30],
                                                    \'shadowBlur\': 10,
                                                    \'shadowColor\': \'rgba(120, 36, 50, 0.5)\',
                                                    \'shadowOffsetY\': 5,
                                                }
                                       }
))

bar.render_notebook()

 

TOP 1000高校按国家分布

fmt_js = \"\"\"function (params) {return params.name+\': \'+Number(params.value[2]);}\"\"\"

mp = Map()
mp.add(
        \"高校数量\",
        data_pair,
        \"world\",
        is_map_symbol_show=False,
        is_roam=False)

mp.set_series_opts(label_opts=opts.LabelOpts(is_show=False),
                          itemstyle_opts={\'normal\': {
                                                \'areaColor\': \'#191970\',
                                                \'borderColor\': \'#1773c3\',
                                                \'shadowColor\': \'#1773c3\',
                                                \'shadowBlur\': 20,
                                                \'opacity\': 0.8
                                                    }
                                        })
    
mp.set_global_opts(
        title_opts=opts.TitleOpts(title=\"TOP 1000高校按国家分布\", pos_left=\'center\',
                                  title_textstyle_opts=opts.TextStyleOpts(font_size=18)),
        legend_opts=opts.LegendOpts(is_show=False),
        visualmap_opts=opts.VisualMapOpts(is_show=False, 
                                          max_=100,
                                          is_piecewise=False,
                                          dimension=0,
                                          range_color=[\'rgba(255,228,225,0.6)\', \'rgba(255,0,0,0.9)\', \'rgba(255,0,0,1)\'])
    )

data_pair = [[x, y] for x, y in data_pair if x in country_list]    
geo = Geo()
    
# 需要先将几个国家的经纬度信息加入到geo中
for k, v in loc.items():
    geo.add_coordinate(k, v[0], v[1])
# 这里将geo的地图透明度配置为0
geo.add_schema(maptype=\"world\", is_roam=False, itemstyle_opts={\'normal\': {\'opacity\': 0}})
    
geo.add(\"\", data_pair, symbol_size=1)
# 显示标签配置
geo.set_series_opts(
    label_opts=opts.LabelOpts(
            is_show=True,
            position=\'right\',
            color=\'white\',
            font_size=12,
            font_weight=\'bold\',
            formatter=JsCode(fmt_js)),
    )
    
grid = (
        Grid(init_opts=opts.InitOpts(theme=\'chalk\', width=\'1000px\', height=\'600px\'))
        .add(mp, grid_opts=opts.GridOpts(pos_top=\"12%\"))
        .add(geo, grid_opts=opts.GridOpts(pos_bottom=\"12%\"))
    )

grid.render_notebook()

 

大洲-国家分布

c = (Sunburst(
        init_opts=opts.InitOpts(
            theme=\'purple-passion\',
            width=\"1000px\",
            height=\"1000px\"))
    .add(
        \"\",
        data_pair=data_pair,
        highlight_policy=\"ancestor\",
        radius=[0, \"100%\"],
        sort_=\'null\',
        levels=[
            {},
            {
                \"r0\": \"20%\",
                \"r\": \"48%\",
                \"itemStyle\": {\"borderColor\": \'rgb(220,220,220)\', \"borderWidth\": 2}
            },
            {\"r0\": \"50%\", \"r\": \"80%\", \"label\": {\"align\": \"right\"},
                \"itemStyle\": {\"borderColor\": \'rgb(220,220,220)\', \"borderWidth\": 1}}
        ],
    )
    .set_global_opts(
        visualmap_opts=opts.VisualMapOpts(is_show=False, max_=300, min_=0, is_piecewise=False,
                                range_color=[\'#4285f4\', \'#34a853\', \'#fbbc05\', \'#ea4335\', \'#ea4335\']),
        title_opts=opts.TitleOpts(title=\"TOP 1000\\n\\n大学地理分布\",
                                               pos_left=\"center\",
                                               pos_top=\"center\",
                                               title_textstyle_opts=opts.TextStyleOpts(font_style=\'oblique\', font_size=20),))
    .set_series_opts(label_opts=opts.LabelOpts(font_size=14, formatter=\"{b}: {c}\"))
)

c.render_notebook()

 

对于本篇文章有疑问的同学可以加【资料白嫖、解答交流群:753182387】


来源:https://www.cnblogs.com/qshhl/p/16469988.html
本站部分图文来源于网络,如有侵权请联系删除。

未经允许不得转载:百木园 » Python采集1000多所世界大学排名数据,制作可视化图

相关推荐

  • 暂无文章