整理Excel文档里的链接至csv文档¶

import pandas as pd
import openpyxl
import re

xlsx = '水电及其他电.xlsx'

df = pd.read_excel(xlsx)

exf = openpyxl.load_workbook(xlsx)
sheet = exf.active
C2 = sheet['C2']
C = sheet['C']

links = [c.value for c in C]

links_1 = links[1:-1]
links_2 = ''.join(links_1)

sample  = '=HYPERLINK("http://news.windin.com/ns/bulletin.php?code=2B41260EA8F3&id=123597160&type=1","方正证券:2020年年度报告")'

p = re.compile('"(.*?)","(.*?)"')
list_of_tuple = p.findall(links_2)

df2 = pd.DataFrame({'link':[t[0] for t in list_of_tuple], 'f_name':[t[1] for t in list_of_tuple]})

df2.to_csv('水电及其他.csv')

筛选并整理出要用的年报链接¶

import re 
import pandas as pd
import os

m= open('水电及其他.csv',encoding='utf-8')
df=pd.read_csv(m)
p = re.compile('(?<=\d{4}(年度))')
f_names = [p.sub('年年度报告', f) for f in df.f_name]
df['f_name'] = f_names; del p,f_names

def filter_links(words,df,include=True):
    ls = []
    for word in words:
        if include:
            ls.append([word in f for f in df.f_name])
        else:
            ls.append([word not in f for f in df.f_name])
    index = []
    for r in range(len(df)):
        flag  = not include
        for c in range(len(words)):
            if include:
                ls.append([word not in f for f in df.f_name])
                index=[]
                for r in range(len(df)):
                    flag=not include
                    for c in range(len(words)):
                        if include:
                            flag = flag or ls[c][r]
                        else:
                            flag = flag and ls[c][r]
                    index.append(flag)
                df2=df[index]
                return(df2)
                
df_all = filter_links(['摘要','问询函','社会责任','审计','财务','风险','债券'],df,include=[False])
df_orig = filter_links(['(','('],df_all,include=[False])
df_updt = filter_links(['(','('],df_all,include=[True])
df_updt = filter_links(['取消'],df_updt,include=[False])

def sub_with_update(df_updt,df_orig):
    df_newest = df_orig.copy()
    index_orig=[]
    index_updt=[]
    for i,f in enumerate(df_orig.f_name):
        for j,fn in enumerate(df_updt.f_name):
            if f in fn:
                index_orig.append(i)
                index_updt.append(j)
    for n in range(len(index_orig)):
        i = index_orig[n]
        j = index_updt[n]
        df_orig.iloc[i,-2] = df_updt.iloc[j,-2]
    return(df_newest)

df_newest  = sub_with_update(df_updt,df_orig)
df_all.sort_values(by=['f_name'],inplace=True)
df_newest['公司简称'] = [f[:4] for f in df_newest.f_name]

counts = df_newest['公司简称'].value_counts()

ten_company = []

for cn in counts.index[:10]:
    ten_company.append(filter_links([cn],df_newest))
    
if not os.path.exists('10companies'):
    os.makedirs('10companies')
    
for df_com in ten_company:
    cn=df_com['公司简称'].iloc[0]
    df_com.to_csv('10companies/%s.csv' % cn)
    
xm=os.listdir('10companies')
print(xm)

['600236 ', '600505 ', '600674 ', '600900 ', '凯迪生态.csv', '川投能源.csv', '桂冠电力.csv', '梅雁吉祥.csv', '湖南发展.csv', '甘肃电投.csv', '西昌电力.csv', '长江电力.csv', '闽东电力.csv', '韶能股份.csv', '黔源电力.csv']

C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:57: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

整合这些链接至一个文档并进行下载¶

import re
import requests
import pandas as pd
import time
import os

os.chdir('/Users/21954/Desktop/新建文件夹/10companies')
fil=os.listdir()
fil.remove(fil[2])

links= []
f_names=[]

for dfil in fil:
    m= open(dfil,encoding='utf-8')
    fmo = pd.read_csv(m)
    for link in fmo['link']:
        links.append(link)
    for f_name in fmo['f_name']:
        f_names.append(f_name)

def get_PDF_url(url):
    r = requests.get(url);r.encoding = 'utf-8'; html = r.text
    r.close() 
    p = re.compile('<a href=(.*?)\s.*?>(.*?)</a>', re.DOTALL)
    a = p.search(html) 
    if a is None:
        Warning('没有找到下载链接。请手动检查链接：%s' % url)
        return()
    else:
        href = a.group(1); fname = a.group(2).strip()
    href = r.url[:26] + href 
    return((href,fname))

hrefs=[];fnames=[]
for link in links:
    href,fname = get_PDF_url(link)
    hrefs.append(href)
    fnames.append(fname)
    time.sleep(10)
    df_final_links=pd.DataFrame({'href':hrefs,'fname':fnames})
    df_final_links.to_csv('final_links水电.csv')

m1= open('final_links水电.csv',encoding='gbk')
df_final_links=pd.read_csv(m1)
f_names=df_final_links['f_name']
hrefs=df_final_links['href']
for i in range(len(hrefs)):
    href=hrefs[i];f_name=f_names[i]
    r = requests.get(href, allow_redirects=True)
    open('%s' %f_name, 'wb').write(r.content)
    time.sleep(10)
r.close()

读取PDF内容¶

import pdfplumber
import os
#将不同公司的年报划分至不同文件夹，一次全部读取，这里以黔源电力为例
os.chdir('/Users/21954/Desktop/新建文件夹/黔源电力')
f_1=os.listdir()
for f_2 in f_1:
    file_path = f_2
    with pdfplumber.open(file_path) as pdf:
        page = pdf.pages[4]
        print(page.extract_table())
        for row in page.extract_tables() :
            print(row)
            print(row[0])#读取每行表头对应的数字

[['', '2010年', '2009年', '本年比上年增减（％）', '2008年'], ['基本每股收益（元/股）（注）', '0.4647', '0.24', '93.63', '0.7275'], ['稀释每股收益（元/股）（注）', '0.4647', '0.24', '93.63', '0.7275'], ['扣除非经常性损益后的基本每股\n收益（元/股）（注）', '0.2373', '-0.3356', '170.71', '0.7461'], ['加权平均净资产收益率（%）', '10.57', '5.82', '增加4.75个百分点', '19.02'], ['扣除非经常性损益后的加权平均\n净资产收益率（%）', '5.40', '-8.14', '增加13.54个百分点', '19.51'], ['每股经营活动产生的现金流量净\n额（元/股）（注）', '8.5695', '3.9993', '114.28', '4.1161'], ['', '2010年末', '2009年末', '本年末比上年末增减\n（％）', '2008年末'], ['归属于上市公司股东的每股净资\n产（元/股）（注）', '8.3499', '4.2689', '95.60', '4.0894']]
[['', '2010年', '2009年', '本年比上年增减（％）', '2008年'], ['基本每股收益（元/股）（注）', '0.4647', '0.24', '93.63', '0.7275'], ['稀释每股收益（元/股）（注）', '0.4647', '0.24', '93.63', '0.7275'], ['扣除非经常性损益后的基本每股\n收益（元/股）（注）', '0.2373', '-0.3356', '170.71', '0.7461'], ['加权平均净资产收益率（%）', '10.57', '5.82', '增加4.75个百分点', '19.02'], ['扣除非经常性损益后的加权平均\n净资产收益率（%）', '5.40', '-8.14', '增加13.54个百分点', '19.51'], ['每股经营活动产生的现金流量净\n额（元/股）（注）', '8.5695', '3.9993', '114.28', '4.1161'], ['', '2010年末', '2009年末', '本年末比上年末增减\n（％）', '2008年末'], ['归属于上市公司股东的每股净资\n产（元/股）（注）', '8.3499', '4.2689', '95.60', '4.0894']]
['', '2010年', '2009年', '本年比上年增减（％）', '2008年']
[['分红年度', '现金分红金额 \n（含税）', '分红年度合并报表中归属于\n上市公司股东的净利润', '占合并报表中归属于上市公司\n股东的净利润的比率'], ['2009年', '15,596,467.20', '33,655,666.96', '46.34%'], ['2008年', '21,038,400.00', '102,036,014.13', '20.62%'], ['2007年', '14,025,600.00', '4,740,959.63', '295.84%'], ['最近三年累计现金分红金额占最近年均净\n利润的比例（%）', None, '108.22', None]]
['分红年度', '现金分红金额 \n（含税）', '分红年度合并报表中归属于\n上市公司股东的净利润', '占合并报表中归属于上市公司\n股东的净利润的比率']
[['项目', '股本', '资本公积', '盈余公积', '法定盈余公积', '未分配利润', '股东权益合计'], ['期初数', '140,256,000', '236,621,451.72', '62,828,296.89', '62,778,296.89', '159,035,350.39', '598,741,099.00'], ['本期增加', '63,343,108', '994,726,883.40', '5,643,011.71', '5,643,011.71', '65,170,612.52', '1,128,883,615.63'], ['本期减少', '0', '6,351,311.74', '', '', '21,239,478.91', '27,590,790.65'], ['期末数', '203,599,108', '1,224,997,023.38', '68,471,308.60', '68,421,308.60', '202,966,484.00', '1,700,033,923.98']]
['项目', '股本', '资本公积', '盈余公积', '法定盈余公积', '未分配利润', '股东权益合计']
[['', '', '', '本年比上年增减', ''], ['', '2011年', '2010年', None, '2009年'], [None, None, None, '（％）', None], ['', '', '', None, ''], ['基本每股收益（元/股）', '-0.4142', '0.4647', '-189.13', '0.24'], ['稀释每股收益（元/股）', '-0.4142', '0.4647', '-189.13', '0.24'], ['扣除非经常性损益后的\n基本每股收益（元/股）', '', '', '', ''], [None, '-0.5223', '0.2373', '-320.10', '-0.3356'], [None, '', '', '', ''], ['加权平均净资产收益率\n（％）', '', '', '同比减少15.73个', ''], [None, '-5.16', '10.57', None, '5.82'], [None, None, None, '百分点', None], [None, '', '', None, ''], ['扣除非经常性损益后的\n加权平均净资产收益率\n（％）', '', '', '', ''], [None, None, None, '同比减少11.90个', None], [None, '-6.50', '5.40', None, '-8.14'], [None, None, None, '百分点', None], [None, '', '', None, ''], [None, None, None, '', None], ['每股经营活动产生的现\n金流量净额（元/股）', '', '', '', ''], [None, '4.648', '5.9034', '-21.27', '3.9993'], [None, '', '', '', ''], ['', '', '', '本年末比上年末增', ''], ['', '2011年末', '2010年末', None, '2009年末'], [None, None, None, '减（％）', None], ['', '', '', None, ''], ['归属于上市公司股东的\n每股净资产（元/股）', '', '', '', ''], [None, '7.769', '8.3499', '-6.96', '4.2689'], [None, '', '', '', ''], ['', '', '', '同比减少1.81个', ''], ['资产负债率（％）', '79.12', '80.93', None, '89.56'], [None, None, None, '百分点', None], ['', '', '', None, '']]
[['', '2011年', '2010年', '本年比上年增减（％）', '2009年'], ['营业总收入', '976,872,950.64', '1,445,658,601.50', '-32.43', '816,614,818.52'], ['营业利润', '-122,447,962.23', '144,614,450.85', '-184.67', '38,234,987.81'], ['利润总额', '-119,659,385.07', '146,506,056.81', '-181.68', '40,513,139.67'], ['归属于上市公司股', '', '', '', ''], [None, '-84,337,318.91', '65,170,612.52', '-229.41', '33,655,666.96'], ['东的净利润', None, None, None, None], [None, '', '', '', ''], ['归属于上市公司股', '', '', '', ''], ['东的扣除非经常性', '-106,331,253.93', '33,288,009.49', '-419.43', '-47,075,376.08'], ['损益的净利润', '', '', '', ''], ['经营活动产生的现', '', '', '', ''], [None, '946,337,151.77', '1,201,928,805.39', '-21.27', '560,926,114.11'], ['金流量净额', None, None, None, None], [None, '', '', '', ''], ['', '', '', '本年末比上年末增减', ''], ['', '2011年末', '2010年末', None, '2009年末'], [None, None, None, '（％）', None], ['', '', '', None, ''], ['资产总额', '15,508,264,567.02', '15,943,419,938.18', '-2.73', '14,808,941,654.46'], ['负债总额', '12,270,770,186.51', '12,903,781,805.03', '-4.91', '13,263,088,688.01'], ['归属于上市公司股', '', '', '', ''], [None, '1,581,756,633.77', '1,700,033,923.98', '-6.96', '598,741,099.00'], ['东的所有者权益', None, None, None, None], [None, '', '', '', ''], ['总股本', '203,599,108.00', '203,599,108.00', '0.00', '140,256,000.00']]
['', '2011年', '2010年', '本年比上年增减（％）', '2009年']
[['', '', '', '本年比上年增减', ''], ['', '2011年', '2010年', None, '2009年'], [None, None, None, '（％）', None], ['', '', '', None, ''], ['基本每股收益（元/股）', '-0.4142', '0.4647', '-189.13', '0.24'], ['稀释每股收益（元/股）', '-0.4142', '0.4647', '-189.13', '0.24'], ['扣除非经常性损益后的\n基本每股收益（元/股）', '', '', '', ''], [None, '-0.5223', '0.2373', '-320.10', '-0.3356'], [None, '', '', '', ''], ['加权平均净资产收益率\n（％）', '', '', '同比减少15.73个', ''], [None, '-5.16', '10.57', None, '5.82'], [None, None, None, '百分点', None], [None, '', '', None, ''], ['扣除非经常性损益后的\n加权平均净资产收益率\n（％）', '', '', '', ''], [None, None, None, '同比减少11.90个', None], [None, '-6.50', '5.40', None, '-8.14'], [None, None, None, '百分点', None], [None, '', '', None, ''], [None, None, None, '', None], ['每股经营活动产生的现\n金流量净额（元/股）', '', '', '', ''], [None, '4.648', '5.9034', '-21.27', '3.9993'], [None, '', '', '', ''], ['', '', '', '本年末比上年末增', ''], ['', '2011年末', '2010年末', None, '2009年末'], [None, None, None, '减（％）', None], ['', '', '', None, ''], ['归属于上市公司股东的\n每股净资产（元/股）', '', '', '', ''], [None, '7.769', '8.3499', '-6.96', '4.2689'], [None, '', '', '', ''], ['', '', '', '同比减少1.81个', ''], ['资产负债率（％）', '79.12', '80.93', None, '89.56'], [None, None, None, '百分点', None], ['', '', '', None, '']]
['', '', '', '本年比上年增减', '']
None
None
None
[['股票简称', '黔源电力', '股票代码', '002039'], ['变更后的股票简称（如有）', '无', None, None], ['股票上市证券交易所', '深圳证券交易所', None, None], ['公司的中文名称', '贵州黔源电力股份有限公司', None, None], ['公司的中文简称', '黔源电力', None, None], ['公司的外文名称（如有）', 'GuiZhou QianYuan Power Co., Ltd.', None, None], ['公司的外文名称缩写（如有）', 'QYDL', None, None], ['公司的法定代表人', '刘靖', None, None], ['注册地址', '贵阳市都司高架桥路46号', None, None], ['注册地址的邮政编码', '550002', None, None], ['办公地址', '贵阳市都司高架桥路46号', None, None], ['办公地址的邮政编码', '550002', None, None], ['公司网址', 'www.gzqydl.cn', None, None], ['电子信箱', 'qydl@gzqydl.cn', None, None]]
[['股票简称', '黔源电力', '股票代码', '002039'], ['变更后的股票简称（如有）', '无', None, None], ['股票上市证券交易所', '深圳证券交易所', None, None], ['公司的中文名称', '贵州黔源电力股份有限公司', None, None], ['公司的中文简称', '黔源电力', None, None], ['公司的外文名称（如有）', 'GuiZhou QianYuan Power Co., Ltd.', None, None], ['公司的外文名称缩写（如有）', 'QYDL', None, None], ['公司的法定代表人', '刘靖', None, None], ['注册地址', '贵阳市都司高架桥路46号', None, None], ['注册地址的邮政编码', '550002', None, None], ['办公地址', '贵阳市都司高架桥路46号', None, None], ['办公地址的邮政编码', '550002', None, None], ['公司网址', 'www.gzqydl.cn', None, None], ['电子信箱', 'qydl@gzqydl.cn', None, None]]
['股票简称', '黔源电力', '股票代码', '002039']
[['', '董事会秘书', '证券事务代表'], ['姓名', '刘明达', '石海宏'], ['联系地址', '贵阳市都司高架桥路46号黔源大厦', '贵阳市都司高架桥路46号黔源大厦'], ['电话', '0851-85218803', '0851-85218944'], ['传真', '0851-85218925', '0851-85218925'], ['电子信箱', 'liumd@gzqydl.cn', 'shihh@gzqydl.cn']]
['', '董事会秘书', '证券事务代表']
[['公司选定的信息披露媒体的名称', '《中国证券报》、《证券时报》'], ['登载年度报告的中国证监会指定网站的网址', 'www.cninfo.com.cn'], ['公司年度报告备置地点', '公司证券管理部']]
['公司选定的信息披露媒体的名称', '《中国证券报》、《证券时报》']
[['股票简称', '黔源电力', '股票代码', '002039'], ['股票上市证券交易所', '深圳证券交易所', None, None], ['公司的中文名称', '贵州黔源电力股份有限公司', None, None], ['公司的中文简称', '黔源电力', None, None], ['公司的外文名称（如有）', 'GuiZhou QianYuan Power Co., Ltd.', None, None], ['公司的外文名称缩写（如有）', 'QYDL', None, None], ['公司的法定代表人', '刘靖', None, None], ['注册地址', '贵阳市都司高架桥路46号', None, None], ['注册地址的邮政编码', '550002', None, None], ['办公地址', '贵阳市都司高架桥路46号', None, None], ['办公地址的邮政编码', '550002', None, None], ['公司网址', 'www.gzqydl.cn', None, None], ['电子信箱', 'qydl@gzqydl.cn', None, None]]
[['股票简称', '黔源电力', '股票代码', '002039'], ['股票上市证券交易所', '深圳证券交易所', None, None], ['公司的中文名称', '贵州黔源电力股份有限公司', None, None], ['公司的中文简称', '黔源电力', None, None], ['公司的外文名称（如有）', 'GuiZhou QianYuan Power Co., Ltd.', None, None], ['公司的外文名称缩写（如有）', 'QYDL', None, None], ['公司的法定代表人', '刘靖', None, None], ['注册地址', '贵阳市都司高架桥路46号', None, None], ['注册地址的邮政编码', '550002', None, None], ['办公地址', '贵阳市都司高架桥路46号', None, None], ['办公地址的邮政编码', '550002', None, None], ['公司网址', 'www.gzqydl.cn', None, None], ['电子信箱', 'qydl@gzqydl.cn', None, None]]
['股票简称', '黔源电力', '股票代码', '002039']
[['', '董事会秘书', '证券事务代表'], ['姓名', '刘明达', '石海宏'], ['联系地址', '贵阳市都司高架桥路46号黔源大厦', '贵阳市都司高架桥路46号黔源大厦'], ['电话', '0851-85218803', '0851-85218944'], ['传真', '0851-85218925', '0851-85218925'], ['电子信箱', 'liumd@gzqydl.cn', 'shihh@gzqydl.cn']]
['', '董事会秘书', '证券事务代表']
[['公司选定的信息披露媒体的名称', '《中国证券报》、《证券时报》'], ['登载年度报告的中国证监会指定网站的网址', 'www.cninfo.com.cn'], ['公司年度报告备置地点', '公司证券管理部']]
['公司选定的信息披露媒体的名称', '《中国证券报》、《证券时报》']
[['股票简称', '黔源电力', '股票代码', '002039'], ['股票上市证券交易所', '深圳证券交易所', None, None], ['公司的中文名称', '贵州黔源电力股份有限公司', None, None], ['公司的中文简称', '黔源电力', None, None], ['公司的外文名称（如有）', 'GuiZhou QianYuan Power Co., Ltd.', None, None], ['公司的外文名称缩写（如有）', 'QYDL', None, None], ['公司的法定代表人', '刘靖', None, None], ['注册地址', '贵阳市都司高架桥路46号', None, None], ['注册地址的邮政编码', '550002', None, None], ['办公地址', '贵阳市都司高架桥路46号', None, None], ['办公地址的邮政编码', '550002', None, None], ['公司网址', 'www.gzqydl.cn', None, None], ['电子信箱', 'qydl@gzqydl.cn', None, None]]
[['股票简称', '黔源电力', '股票代码', '002039'], ['股票上市证券交易所', '深圳证券交易所', None, None], ['公司的中文名称', '贵州黔源电力股份有限公司', None, None], ['公司的中文简称', '黔源电力', None, None], ['公司的外文名称（如有）', 'GuiZhou QianYuan Power Co., Ltd.', None, None], ['公司的外文名称缩写（如有）', 'QYDL', None, None], ['公司的法定代表人', '刘靖', None, None], ['注册地址', '贵阳市都司高架桥路46号', None, None], ['注册地址的邮政编码', '550002', None, None], ['办公地址', '贵阳市都司高架桥路46号', None, None], ['办公地址的邮政编码', '550002', None, None], ['公司网址', 'www.gzqydl.cn', None, None], ['电子信箱', 'qydl@gzqydl.cn', None, None]]
['股票简称', '黔源电力', '股票代码', '002039']
[['', '董事会秘书', '证券事务代表'], ['姓名', '刘靖', '石海宏'], ['联系地址', '贵阳市都司高架桥路46号黔源大厦', '贵阳市都司高架桥路46号黔源大厦'], ['电话', '0851-85218810', '0851-85218944'], ['传真', '0851-85218925', '0851-85218925'], ['电子信箱', 'liujing@gzqydl.cn', 'shihh@gzqydl.cn']]
['', '董事会秘书', '证券事务代表']
[['公司选定的信息披露媒体的名称', '《中国证券报》、《证券时报》'], ['登载年度报告的中国证监会指定网站的网址', 'www.cninfo.com.cn'], ['公司年度报告备置地点', '公司证券管理部']]
['公司选定的信息披露媒体的名称', '《中国证券报》、《证券时报》']
[['股票简称', '黔源电力', '股票代码', '002039'], ['股票上市证券交易所', '深圳证券交易所', None, None], ['公司的中文名称', '贵州黔源电力股份有限公司', None, None], ['公司的中文简称', '黔源电力', None, None], ['公司的外文名称（如有）', 'GuiZhou QianYuan Power Co., Ltd.', None, None], ['公司的外文名称缩写（如有）', 'QYDL', None, None], ['公司的法定代表人', '刘靖', None, None], ['注册地址', '贵州省贵阳市南明区都司高架桥路46号（黔源大厦）', None, None], ['注册地址的邮政编码', '550002', None, None], ['办公地址', '贵州省贵阳市南明区都司高架桥路46号（黔源大厦）', None, None], ['办公地址的邮政编码', '550002', None, None], ['公司网址', 'www.gzqydl.cn', None, None], ['电子信箱', 'qydl@gzqydl.cn', None, None]]
[['股票简称', '黔源电力', '股票代码', '002039'], ['股票上市证券交易所', '深圳证券交易所', None, None], ['公司的中文名称', '贵州黔源电力股份有限公司', None, None], ['公司的中文简称', '黔源电力', None, None], ['公司的外文名称（如有）', 'GuiZhou QianYuan Power Co., Ltd.', None, None], ['公司的外文名称缩写（如有）', 'QYDL', None, None], ['公司的法定代表人', '刘靖', None, None], ['注册地址', '贵州省贵阳市南明区都司高架桥路46号（黔源大厦）', None, None], ['注册地址的邮政编码', '550002', None, None], ['办公地址', '贵州省贵阳市南明区都司高架桥路46号（黔源大厦）', None, None], ['办公地址的邮政编码', '550002', None, None], ['公司网址', 'www.gzqydl.cn', None, None], ['电子信箱', 'qydl@gzqydl.cn', None, None]]
['股票简称', '黔源电力', '股票代码', '002039']
[['', '董事会秘书', '证券事务代表'], ['姓名', '杨焱', '石海宏'], ['联系地址', '贵阳市都司高架桥路46号黔源大厦', '贵阳市都司高架桥路46号黔源大厦'], ['电话', '0851-85218808', '0851-85218944'], ['传真', '0851-85218925', '0851-85218925'], ['电子信箱', 'yangyan@gzqydl.cn', 'shihh@gzqydl.cn']]
['', '董事会秘书', '证券事务代表']
[['公司选定的信息披露媒体的名称', '《中国证券报》《证券时报》'], ['登载年度报告的中国证监会指定网站的网址', 'www.cninfo.com.cn'], ['公司年度报告备置地点', '公司证券管理部']]
['公司选定的信息披露媒体的名称', '《中国证券报》《证券时报》']
[['股票简称', '黔源电力', '股票代码', '002039'], ['股票上市证券交易所', '深圳证券交易所', None, None], ['公司的中文名称', '贵州黔源电力股份有限公司', None, None], ['公司的中文简称', '黔源电力', None, None], ['公司的外文名称（如有）', 'GuiZhou QianYuan Power Co., Ltd.', None, None], ['公司的外文名称缩写（如有）', 'QYDL', None, None], ['公司的法定代表人', '陶云鹏', None, None], ['注册地址', '贵州省贵阳市南明区都司高架桥路46号（黔源大厦）', None, None], ['注册地址的邮政编码', '550002', None, None], ['办公地址', '贵州省贵阳市南明区都司高架桥路46号（黔源大厦）', None, None], ['办公地址的邮政编码', '550002', None, None], ['公司网址', 'www.gzqydl.cn', None, None], ['电子信箱', 'qydl@gzqydl.cn', None, None]]
[['股票简称', '黔源电力', '股票代码', '002039'], ['股票上市证券交易所', '深圳证券交易所', None, None], ['公司的中文名称', '贵州黔源电力股份有限公司', None, None], ['公司的中文简称', '黔源电力', None, None], ['公司的外文名称（如有）', 'GuiZhou QianYuan Power Co., Ltd.', None, None], ['公司的外文名称缩写（如有）', 'QYDL', None, None], ['公司的法定代表人', '陶云鹏', None, None], ['注册地址', '贵州省贵阳市南明区都司高架桥路46号（黔源大厦）', None, None], ['注册地址的邮政编码', '550002', None, None], ['办公地址', '贵州省贵阳市南明区都司高架桥路46号（黔源大厦）', None, None], ['办公地址的邮政编码', '550002', None, None], ['公司网址', 'www.gzqydl.cn', None, None], ['电子信箱', 'qydl@gzqydl.cn', None, None]]
['股票简称', '黔源电力', '股票代码', '002039']
[['', '董事会秘书', '证券事务代表'], ['姓名', '杨焱', '石海宏'], ['联系地址', '贵阳市都司高架桥路46号黔源大厦', '贵阳市都司高架桥路46号黔源大厦'], ['电话', '0851-85218808', '0851-85218944'], ['传真', '0851-85218925', '0851-85218925'], ['电子信箱', 'yangyan@gzqydl.cn', 'shihh@gzqydl.cn']]
['', '董事会秘书', '证券事务代表']
[['公司选定的信息披露媒体的名称', '《中国证券报》《证券时报》'], ['登载年度报告的中国证监会指定网站的网址', 'www.cninfo.com.cn'], ['公司年度报告备置地点', '公司证券管理部']]
['公司选定的信息披露媒体的名称', '《中国证券报》《证券时报》']
[['股票简称', '黔源电力', '股票代码', '002039'], ['股票上市证券交易所', '深圳证券交易所', None, None], ['公司的中文名称', '贵州黔源电力股份有限公司', None, None], ['公司的中文简称', '黔源电力', None, None], ['公司的外文名称（如有）', 'GuiZhou QianYuan Power Co., Ltd.', None, None], ['公司的外文名称缩写（如有）', 'QYDL', None, None], ['公司的法定代表人', '罗涛', None, None], ['注册地址', '贵州省贵阳市南明区都司高架桥路46号（黔源大厦）', None, None], ['注册地址的邮政编码', '550002', None, None], ['办公地址', '贵州省贵阳市南明区都司高架桥路46号（黔源大厦）', None, None], ['办公地址的邮政编码', '550002', None, None], ['公司网址', 'www.gzqydl.cn', None, None], ['电子信箱', 'qydl@gzqydl.cn', None, None]]
[['股票简称', '黔源电力', '股票代码', '002039'], ['股票上市证券交易所', '深圳证券交易所', None, None], ['公司的中文名称', '贵州黔源电力股份有限公司', None, None], ['公司的中文简称', '黔源电力', None, None], ['公司的外文名称（如有）', 'GuiZhou QianYuan Power Co., Ltd.', None, None], ['公司的外文名称缩写（如有）', 'QYDL', None, None], ['公司的法定代表人', '罗涛', None, None], ['注册地址', '贵州省贵阳市南明区都司高架桥路46号（黔源大厦）', None, None], ['注册地址的邮政编码', '550002', None, None], ['办公地址', '贵州省贵阳市南明区都司高架桥路46号（黔源大厦）', None, None], ['办公地址的邮政编码', '550002', None, None], ['公司网址', 'www.gzqydl.cn', None, None], ['电子信箱', 'qydl@gzqydl.cn', None, None]]
['股票简称', '黔源电力', '股票代码', '002039']
[['', '董事会秘书', '证券事务代表'], ['姓名', '杨焱', '石海宏'], ['联系地址', '贵阳市都司高架桥路46号黔源大厦', '贵阳市都司高架桥路46号黔源大厦'], ['电话', '0851-85218808', '0851-85218944'], ['传真', '0851-85218925', '0851-85218925'], ['电子信箱', 'yangyan@gzqydl.cn', 'shihh@gzqydl.cn']]
['', '董事会秘书', '证券事务代表']
[['公司选定的信息披露媒体的名称', '《中国证券报》《证券时报》'], ['登载年度报告的中国证监会指定网站的网址', 'www.cninfo.com.cn'], ['公司年度报告备置地点', '公司证券管理部']]
['公司选定的信息披露媒体的名称', '《中国证券报》《证券时报》']

筛选出要用的数据并绘制图表¶

这里着重对各个公司的营业收入和年度现金流量进行了可视化分析，西昌电力和梅雁吉祥因为数据缺失和无法下载先不做分析*

import xlrd
import matplotlib.pyplot as plt
import os

os.chdir('/Users/21954/Desktop/新建文件夹')
plt.rcParams['font.sans-serif'] = ['SimHei']  
plt.rcParams['axes.unicode_minus'] = False  
#导入excel文件，以及第几张表
data = xlrd.open_workbook('桂冠电力.xlsx')
table = data.sheets()[0]
#第一个图的数据
t1 = table.col_values(1)
tt = t1[2:9]
xAxis1 = range(2013,2020)
#第二个图的数据
t2 = table.col_values(2)
tu = t2[2:9]
xAxis2 = range(2013,2020)

#作图
plt.figure(figsize=(15,8))
plt.plot(xAxis1, tt, label='营业收入')
plt.plot(xAxis2, tu, label='经营活动产生的现金流量净额')

plt.xlabel('年份')
plt.ylabel(u'金额数值',fontsize=15)
plt.title(u'桂冠电力',fontsize=20)
plt.legend()
plt.show()

import xlrd
import matplotlib.pyplot as plt
import os

os.chdir('/Users/21954/Desktop/新建文件夹')
plt.rcParams['font.sans-serif'] = ['SimHei']  
plt.rcParams['axes.unicode_minus'] = False  
#导入excel文件，以及第几张表
data = xlrd.open_workbook('川投能源.xlsx')
table = data.sheets()[0]
#第一个图的数据
t1 = table.col_values(1)
tt = t1[2:9]
xAxis1 = range(2013,2020)
#第二个图的数据
t2 = table.col_values(2)
tu = t2[2:9]
xAxis2 = range(2013,2020)

#作图
plt.figure(figsize=(15,8))
plt.plot(xAxis1, tt, label='营业收入')
plt.plot(xAxis2, tu, label='经营活动产生的现金流量净额')

plt.xlabel('年份')
plt.ylabel(u'金额数值',fontsize=15)
plt.title(u'川投能源',fontsize=20)
plt.legend()
plt.show()

import xlrd
import matplotlib.pyplot as plt
import os

os.chdir('/Users/21954/Desktop/新建文件夹')
plt.rcParams['font.sans-serif'] = ['SimHei']  
plt.rcParams['axes.unicode_minus'] = False  
#导入excel文件，以及第几张表
data = xlrd.open_workbook('长江电力.xlsx')
table = data.sheets()[0]
#第一个图的数据
t1 = table.col_values(1)
tt = t1[2:9]
xAxis1 = range(2013,2020)
#第二个图的数据
t2 = table.col_values(2)
tu = t2[2:9]
xAxis2 = range(2013,2020)

#作图
plt.figure(figsize=(15,8))
plt.plot(xAxis1, tt, label='营业收入')
plt.plot(xAxis2, tu, label='经营活动产生的现金流量净额')

plt.xlabel('年份')
plt.ylabel(u'金额数值',fontsize=15)
plt.title(u'长江电力',fontsize=20)
plt.legend()
plt.show()

import xlrd
import matplotlib.pyplot as plt
import os

os.chdir('/Users/21954/Desktop/新建文件夹')
plt.rcParams['font.sans-serif'] = ['SimHei']  
plt.rcParams['axes.unicode_minus'] = False  
#导入excel文件，以及第几张表
data = xlrd.open_workbook('甘肃电投.xlsx')
table = data.sheets()[0]
#第一个图的数据
t1 = table.col_values(1)
tt = t1[2:9]
xAxis1 = range(2013,2020)
#第二个图的数据
t2 = table.col_values(2)
tu = t2[2:9]
xAxis2 = range(2013,2020)

#作图
plt.figure(figsize=(15,8))
plt.plot(xAxis1, tt, label='营业收入')
plt.plot(xAxis2, tu, label='经营活动产生的现金流量净额')

plt.xlabel('年份')
plt.ylabel(u'金额数值',fontsize=15)
plt.title(u'甘肃电投',fontsize=20)
plt.legend()
plt.show()

import xlrd
import matplotlib.pyplot as plt
import os

os.chdir('/Users/21954/Desktop/新建文件夹')
plt.rcParams['font.sans-serif'] = ['SimHei']  
plt.rcParams['axes.unicode_minus'] = False  
#导入excel文件，以及第几张表
data = xlrd.open_workbook('湖南发展.xlsx')
table = data.sheets()[0]
#第一个图的数据
t1 = table.col_values(1)
tt = t1[2:9]
xAxis1 = range(2013,2020)
#第二个图的数据
t2 = table.col_values(2)
tu = t2[2:9]
xAxis2 = range(2013,2020)

#作图
plt.figure(figsize=(15,8))
plt.plot(xAxis1, tt, label='营业收入')
plt.plot(xAxis2, tu, label='经营活动产生的现金流量净额')

plt.xlabel('年份')
plt.ylabel(u'金额数值',fontsize=15)
plt.title(u'湖南发展',fontsize=20)
plt.legend()
plt.show()

湖南发展最新一年的经营活动产生的现金流量净额是负值，因此导致了上图的结果，受流域降雨减少、合并报表范围变更、新冠疫情等综合因素影响，公司实现营业收入31,088.59万元，同比增长27.48%；实现归属于上市公司股东的净利润9,787.99万元，同比减少29.25%，受疫情和气候的影响较为严重。

import xlrd
import matplotlib.pyplot as plt
import os

os.chdir('/Users/21954/Desktop/新建文件夹')
plt.rcParams['font.sans-serif'] = ['SimHei']  
plt.rcParams['axes.unicode_minus'] = False  
#导入excel文件，以及第几张表
data = xlrd.open_workbook('闽东电力.xlsx')
table = data.sheets()[0]
#第一个图的数据
t1 = table.col_values(1)
tt = t1[2:9]
xAxis1 = range(2013,2020)
#第二个图的数据
t2 = table.col_values(2)
tu = t2[2:9]
xAxis2 = range(2013,2020)

#作图
plt.figure(figsize=(15,8))
plt.plot(xAxis1, tt, label='营业收入')
plt.plot(xAxis2, tu, label='经营活动产生的现金流量净额')

plt.xlabel('年份')
plt.ylabel(u'金额数值',fontsize=15)
plt.title(u'闽东电力',fontsize=20)
plt.legend()
plt.show()

import xlrd
import matplotlib.pyplot as plt
import os

os.chdir('/Users/21954/Desktop/新建文件夹')
plt.rcParams['font.sans-serif'] = ['SimHei']  
plt.rcParams['axes.unicode_minus'] = False  
#导入excel文件，以及第几张表
data = xlrd.open_workbook('韶能股份.xlsx')
table = data.sheets()[0]
#第一个图的数据
t1 = table.col_values(1)
tt = t1[2:9]
xAxis1 = range(2013,2020)
#第二个图的数据
t2 = table.col_values(2)
tu = t2[2:9]
xAxis2 = range(2013,2020)

#作图
plt.figure(figsize=(15,8))
plt.plot(xAxis1, tt, label='营业收入')
plt.plot(xAxis2, tu, label='经营活动产生的现金流量净额')

plt.xlabel('年份')
plt.ylabel(u'金额数值',fontsize=15)
plt.title(u'韶能股份',fontsize=20)
plt.legend()
plt.show()

import xlrd
import matplotlib.pyplot as plt
import os

os.chdir('/Users/21954/Desktop/新建文件夹')
plt.rcParams['font.sans-serif'] = ['SimHei']  
plt.rcParams['axes.unicode_minus'] = False  
#导入excel文件，以及第几张表
data = xlrd.open_workbook('黔源电力.xlsx')
table = data.sheets()[0]
#第一个图的数据
t1 = table.col_values(1)
tt = t1[2:9]
xAxis1 = range(2013,2020)
#第二个图的数据
t2 = table.col_values(2)
tu = t2[2:9]
xAxis2 = range(2013,2020)

#作图
plt.figure(figsize=(15,8))
plt.plot(xAxis1, tt, label='营业收入')
plt.plot(xAxis2, tu, label='经营活动产生的现金流量净额')

plt.xlabel('年份')
plt.ylabel(u'金额数值',fontsize=15)
plt.title(u'黔源电力',fontsize=20)
plt.legend()
plt.show()

每年度数值横向分析¶

import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
rcParams['font.family'] = 'simhei'
people = pd.read_excel('历年营业收入.xlsx')
people.plot.bar(x='公司名称',y='2013年营业收入',color='blue')
plt.xticks(rotation=360)
plt.show()
people.plot.bar(x='公司名称',y='2014年营业收入',color='blue')
plt.xticks(rotation=360)
plt.show()
people.plot.bar(x='公司名称',y='2015年营业收入',color='blue')
plt.xticks(rotation=360)
plt.show()
people.plot.bar(x='公司名称',y='2016年营业收入',color='blue')
plt.xticks(rotation=360)
plt.show()
people.plot.bar(x='公司名称',y='2017年营业收入',color='blue')
plt.xticks(rotation=360)
plt.show()
people.plot.bar(x='公司名称',y='2018年营业收入',color='blue')
plt.xticks(rotation=360)
plt.show()
people.plot.bar(x='公司名称',y='2019年营业收入',color='blue')
plt.xticks(rotation=360)
plt.show()
people.plot.bar(x='公司名称',y='2020年营业收入',color='blue')
plt.xticks(rotation=360)
plt.show()

从年报数据来看，除了坐拥三峡集团的长江电力在气候和疫情多变的情况下保持了稳步增长，其他在分析行列的公司均有不同程度的折损，尤其是闽东电力和湖南发展，本区域水电开发资源枯竭，全国能源工作会议却指出，要着力提高能源供给水平，加快风电光伏发展，稳步推进水电核电建设，大力提升新能源消纳和储存能力，深入推进煤炭清洁高效开发利用，进一步优化完善电网建设，这对于这些区域条件受限的公司来说，有了很大程度的挑战，从年报数据上来看，不止营收下降，减少派息，投资其他产业等都是艰难情况下不得已而为之的必要手段。