import pandas as pd
import numpy as np
供应链共同审计是指供应链上多个企业共享同一审计机构或同一审计资源。由于供应链上下游企业之间存在高度的依赖性和协作需求,选择共享审计可以降低审计成本、提高审计效率。此外,相关研究表明,供应链共同审计在减少审计风险与审计费用、降低公司财务重述与提升审计质量、提高资本配置效率以及提升劳动收入份额等方面发挥着积极的影响作用。
供应链共同审计的定义有两种角度,一种是供应链上两家企业年报审计时选择同一家会计师事务所(杨清香,2015),另一种是签字注册会计师为同一人。第一种定义的要求相对更宽松,能获得更多的样本,大多数研究也都选择第一种。本文也选择第一种定义来构造共同审计的样本。
本文选取的样本期间为 2001-2023 年,以披露了前五大客户(供应商)的 A 股上市公司为初始样本。在此基础上,剔除金融行业的样本、剔除交易状态异常的样本、剔除上市未满一年的样本、剔除变量缺失的样本,借鉴杨清香等(2015)的研究,若目标企业在一个会计年度内与其前五大供应商或客户中至少一家聘用同一家会计师事务所,则判定为共同审计。最终获得与上游供应商共同审计 98 个样本,与下游客户共同审计 153 个样本,混合样本 243 个。
前五大供应商数据
数据来源:国泰安-公司研究系列-供应链研究-上市公司供应链信息-前五大供应商信息表,选择剔除 ST、金融业(证监会 2012 版分类)的全部 A 股
- 年份整理:2001-2023
- 剔除客户为非上市公司的样本。非上市公司未披露审计机构相关信息,无法判断是否共同审计
= pd.read_excel(
raw_data "./Data/前五大供应商采购信息表2001-2023/SC_TopFivePurchaseInfo.xlsx",
={"Symbol": str, "BusinessSymbol": str},
dtype )
raw_data.head()
Symbol | EndDate | StateTypeCode | Rank | BusinessInstitutionID | InstitutionName | IsListed | BusinessSymbol | ProportionOfTotalValue | |
---|---|---|---|---|---|---|---|---|---|
0 | 000002 | 2001-12-31 | 1 | 6 | NaN | 合计 | N | NaN | 27.48 |
1 | 000002 | 2002-12-31 | 1 | 6 | NaN | 合计 | N | NaN | 22.67 |
2 | 000002 | 2003-12-31 | 1 | 6 | NaN | 合计 | N | NaN | 26.96 |
3 | 000002 | 2004-12-31 | 1 | 6 | NaN | 合计 | N | NaN | 26.46 |
4 | 000002 | 2005-12-31 | 1 | 6 | NaN | 合计 | N | NaN | 21.70 |
# 选择供应商为上市公司的样本,供应商为上市公司才能获取审计信息
= raw_data.query("IsListed == 'Y'")
data_cleaned_by_supplier # 考虑合并报表
= data_cleaned_by_supplier.query(
data_cleaned_by_stateTypeCode "StateTypeCode == 1"
=True)
).reset_index(drop
= data_cleaned_by_stateTypeCode.loc[
data_cleaned_by_stateTypeCode "Symbol", "EndDate", "BusinessSymbol", "ProportionOfTotalValue"]
:, [ ]
# 个股信息数据
= pd.read_excel(
stock_info "./Data/上市公司基本信息年度表2000-2023/STK_LISTEDCOINFOANL.xlsx",
={"Symbol": str, "PROVINCECODE": str, "CITYCODE": str},
dtype )
stock_info.head()
Symbol | ShortName | EndDate | ListedCoID | IndustryName | IndustryCode | PROVINCECODE | CITYCODE | LISTINGSTATE | |
---|---|---|---|---|---|---|---|---|---|
0 | 000002 | 深万科A | 2000-12-31 | 101775 | 房地产开发与经营业 | J01 | 440000 | 440300 | 正常上市 |
1 | 000002 | 深万科A | 2001-12-31 | 101775 | 房地产开发与经营业 | J01 | 440000 | 440300 | 正常上市 |
2 | 000002 | 万科A | 2002-12-31 | 101775 | 房地产开发与经营业 | J01 | 440000 | 440300 | 正常上市 |
3 | 000002 | 万科A | 2003-12-31 | 101775 | 房地产开发与经营业 | J01 | 440000 | 440300 | 正常上市 |
4 | 000002 | 万科A | 2004-12-31 | 101775 | 房地产开发与经营业 | J01 | 440000 | 440300 | 正常上市 |
= stock_info.loc[:, ["Symbol", "EndDate", "IndustryCode", "LISTINGSTATE"]]
stock_info # 合并 data_cleaned_by_stateTypeCode 与 stock_info 获取行业和上市情况信息
= data_cleaned_by_stateTypeCode.merge(
sample_merged_with_info ="left", on=["Symbol", "EndDate"]
stock_info, how )
sample_merged_with_info.head()
Symbol | EndDate | BusinessSymbol | ProportionOfTotalValue | IndustryCode | LISTINGSTATE | |
---|---|---|---|---|---|---|
0 | 000004 | 2012-12-31 | 301093 | 4.30 | C27 | 正常上市 |
1 | 000004 | 2014-12-31 | 002099 | 2.08 | C27 | 正常上市 |
2 | 000004 | 2016-12-31 | 301093 | 1.09 | C27 | 正常上市 |
3 | 000009 | 2013-12-31 | 600998 | 2.48 | S90 | 正常上市 |
4 | 000010 | 2012-12-31 | 300820 | 8.04 | C35 | 正常上市 |
# 剔除 ST *ST
= sample_merged_with_info.query(
sample_excluded_ST "LISTINGSTATE == '正常上市'"
=True) ).reset_index(drop
# 处理上游供应商的股票代码
# 部分供应商同时在香港和内地上市、发行 B 股、在美国上市等等,BusinessSymbol 代码需要处理
def filter_ticker(ticker):
return len(ticker) == 6
# 查看 BusinessSymbol 非 6 位的样本
= sample_excluded_ST.loc[
business_symbol ~sample_excluded_ST.BusinessSymbol.apply(lambda ticker: filter_ticker(ticker)), :
].BusinessSymbol.unique()
business_symbol
array(['000898;00347', '02318;601318;82318', '00338;600688;SHI', '01606',
'01099', '01066', '000338;02338', '01898;601898', '01072;600875',
'01786', '06869;601869', '000761;200761', '02600;601600;ACH',
'02088', '00570', '688223;JKS', '01171;600188', '002202;02208',
'00857;601857;PTR', '000030;200030', '01727', '01800;601800',
'01033;600871', '00386;600028;SNP', '01133', '000581;200581',
'02607;601607', '02196;600196', '00564;601717', '02170', '04333',
'600819;900918', '000012;200012', 'SGOC', '02899;601899',
'06655;600801', 'CCIH', '00358;600362', 'JASO', '000553;200553',
'03988;04619;601988', '000726;200726', '002594;01211;81211',
'03996;601868', '02727;601727', '600776;900941', 'LDK', '00438',
'01055;600029;ZNH', '00753;601111;AIRC', '00670;600115;CEA',
'600221;900945', '00981;688981;SMI', '01385;688385',
'00728;601728;CHA', '600663;900932', 'JD', '01527', '002460;01772',
'VNET', '5FI', '01053;601005', '00323;600808', '06881;601881',
'870800;920016', '06127;603127', '01088;601088', '000157;01157',
'01354', '01398;04620;601398', '09988;89988;BABA', '00992;80992',
'06066;601066', '01186;601186', '002466;09696', '00568;002490',
'000333;00300', '000756;00719', '06188', '000521;200521', '04335'],
dtype=object)
# 获取 A 股列表
= stock_info.Symbol.unique()
stocks_array
# 如果客户 BusinessSymbol 列中的代码在股票列表中,则返回 True
def filter_customer(symbol):
= symbol.split(";")
symbol_list for s in symbol_list:
if s in stocks_array:
return True
return False
= sample_excluded_ST.loc[
sample_excluded apply(lambda symbol: filter_customer(symbol)), :
sample_excluded_ST.BusinessSymbol.=True) ].reset_index(drop
sample_excluded.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1791 entries, 0 to 1790
Data columns (total 6 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Symbol 1791 non-null object
1 EndDate 1791 non-null object
2 BusinessSymbol 1791 non-null object
3 ProportionOfTotalValue 1773 non-null float64
4 IndustryCode 1791 non-null object
5 LISTINGSTATE 1791 non-null object
dtypes: float64(1), object(5)
memory usage: 84.1+ KB
# 处理 BusinessSymbol
def customer(symbol):
= symbol.split(";")
symbol_list for s in symbol_list:
if s in stocks_array:
return s
"BusinessTicker"] = sample_excluded.BusinessSymbol.apply(
sample_excluded.loc[:, lambda symbol: customer(symbol)
)
= sample_excluded.drop(columns=["BusinessSymbol"]) final
"./Result/上市公司前五大供应商数据.csv", index=False) final.to_csv(
前五大客户数据
= pd.read_excel(
raw_data "./Data/前五大客户销售信息表2001-2023/SC_TopFiveSaleInfo.xlsx",
={"Symbol": str, "BusinessSymbol": str},
dtype )
# 选择客户为上市公司的样本,客户为上市公司才能获取审计信息
= raw_data.query("IsListed == 'Y'")
data_cleaned_by_customer
# 考虑合并报表
= data_cleaned_by_customer.query(
data_cleaned_by_stateTypeCode "StateTypeCode == 1"
=True)
).reset_index(drop
= data_cleaned_by_stateTypeCode.loc[
data_cleaned_by_stateTypeCode "Symbol", "EndDate", "BusinessSymbol", "ProportionOfTotalValue"]
:, [ ]
# 合并 data_cleaned_by_stateTypeCode 与 stock_info 获取行业和上市情况信息
= data_cleaned_by_stateTypeCode.merge(
sample_merged_with_info ="left", on=["Symbol", "EndDate"], validate="m:1"
stock_info, how )
# 剔除 ST *ST
= sample_merged_with_info.query(
sample_excluded_ST "LISTINGSTATE == '正常上市'"
=True) ).reset_index(drop
= sample_excluded_ST.loc[
sample_excluded apply(lambda symbol: filter_customer(symbol)), :
sample_excluded_ST.BusinessSymbol.=True) ].reset_index(drop
"BusinessTicker"] = sample_excluded.BusinessSymbol.apply(
sample_excluded.loc[:, lambda symbol: customer(symbol)
)
= sample_excluded.drop(columns=["BusinessSymbol"])
final "./Result/上市公司前五大客户数据.csv", index=False) final.to_csv(
与上游供应商共同审计
= pd.read_excel(
audit_data "./Data/审计意见表文件1990-2023/FIN_Audit.xlsx",
={"Stkcd": str, "DadtunitID": str},
dtype )
audit_data.rename(={"Stkcd": "Symbol", "Dcost": "auditFee", "DadtunitID": "auditID"},
columns=True,
inplace
)
# 原数据中有半年报的数据,需要剔除
"month"] = (
audit_data.loc[:, str.split("-", expand=True).iloc[:, 1].astype("int64")
audit_data.Accper.
)= (
audit_data_dropna "month == 12").dropna(subset=["auditID"]).reset_index(drop=True)
audit_data.query( )
# 看看是否有重复值
"Symbol", "Accper"]].duplicated(), :] audit_data_dropna.loc[audit_data_dropna.loc[:, [
Symbol | Stknme | Accper | Annodt | Dadtunit | auditFee | auditID | month |
---|
= audit_data_dropna.loc[:, ["Symbol", "Accper", "auditID", "auditFee"]] audit
={"Accper": "EndDate"}, inplace=True)
audit.rename(columns audit.head()
Symbol | EndDate | auditID | auditFee | |
---|---|---|---|---|
0 | 000002 | 1992-12-31 | 107286 | NaN |
1 | 000002 | 1993-12-31 | 107286 | NaN |
2 | 000002 | 1994-12-31 | 107286 | NaN |
3 | 000002 | 1995-12-31 | 107286 | NaN |
4 | 000002 | 1996-12-31 | 107286 | NaN |
= pd.read_csv(
sample_raw "./Result/上市公司前五大供应商数据.csv",
={
dtype"Symbol": str,
"BusinessTicker": str,
}, )
# 给上市公司样本添加审计师信息
= sample_raw.merge(
sample_with_audit ="left", on=["Symbol", "EndDate"], validate="m:1"
audit, how )
# 给供应商添加审计师信息
= sample_with_audit.merge(
data_with_audit
audit,="left",
how=["BusinessTicker", "EndDate"],
left_on=["Symbol", "EndDate"],
right_on=("_sample", "_supplier"),
suffixes )
# 剔除审计师事务所信息缺失的样本
= data_with_audit.dropna(
data_excluded_na ="any", subset=["auditID_sample", "auditID_supplier"], axis=0
how=True) ).reset_index(drop
"audit_common_temp"] = (
data_excluded_na.loc[:, == data_excluded_na.auditID_supplier
data_excluded_na.auditID_sample
)
= data_excluded_na.groupby(["Symbol_sample", "EndDate", "IndustryCode"]).agg(
sample
{"audit_common_temp": "sum",
}
)
=True)
sample.reset_index(inplace"共同审计"] = [0 if i == 0 else 1 for i in sample.audit_common_temp]
sample.loc[:, =["audit_common_temp"], inplace=True) sample.drop(columns
sample.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1188 entries, 0 to 1187
Data columns (total 4 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Symbol_sample 1188 non-null object
1 EndDate 1188 non-null object
2 IndustryCode 1188 non-null object
3 共同审计 1188 non-null int64
dtypes: int64(1), object(3)
memory usage: 37.2+ KB
sum(sample.loc[:, "共同审计"])
98
"./Result/与上游供应商共同审计.csv", index=False) sample.to_csv(
与下游客户共同审计
= pd.read_csv(
sample_raw "./Result/上市公司前五大客户数据.csv",
={
dtype"Symbol": str,
"BusinessTicker": str,
}, )
# 给上市公司样本添加审计师信息
= sample_raw.merge(
sample_with_audit ="left", on=["Symbol", "EndDate"], validate="m:1"
audit, how
)
# 给客户添加审计师信息
= sample_with_audit.merge(
data_with_audit
audit,="left",
how=["BusinessTicker", "EndDate"],
left_on=["Symbol", "EndDate"],
right_on=("_sample", "_customer"),
suffixes )
# 剔除审计师事务所信息缺失的样本
= data_with_audit.dropna(
data_excluded_na ="any", subset=["auditID_sample", "auditID_customer"], axis=0
how=True)
).reset_index(drop
"audit_common_temp"] = (
data_excluded_na.loc[:, == data_excluded_na.auditID_customer
data_excluded_na.auditID_sample
)
= data_excluded_na.groupby(["Symbol_sample", "EndDate", "IndustryCode"]).agg(
sample
{"audit_common_temp": "sum",
} )
=True)
sample.reset_index(inplace"共同审计"] = [0 if i == 0 else 1 for i in sample.audit_common_temp]
sample.loc[:, =["audit_common_temp"], inplace=True) sample.drop(columns
sample.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1576 entries, 0 to 1575
Data columns (total 4 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Symbol_sample 1576 non-null object
1 EndDate 1576 non-null object
2 IndustryCode 1576 non-null object
3 共同审计 1576 non-null int64
dtypes: int64(1), object(3)
memory usage: 49.4+ KB
sum(sample.loc[:, "共同审计"])
153
"./Result/与下游客户共同审计.csv", index=False) sample.to_csv(
混合样本
= pd.read_csv(
sales_sample "./Result/与下游客户共同审计.csv", dtype={"Symbol_sample": str}
)= pd.read_csv(
supplier_sample "./Result/与上游供应商共同审计.csv", dtype={"Symbol_sample": str}
)
# 将与上游供应商共同审计的样本和下游客户共同审计的样本合并在一起,涉及到一个问题:有的样本既与上游供应商共同审计,又与下游客户共同审计。
= sales_sample.merge(
sample
supplier_sample,="outer",
how=["Symbol_sample", "EndDate", "IndustryCode"],
on=["_customer", "_supplier"],
suffixes
)
0, inplace=True) sample.fillna(
sample
Symbol_sample | EndDate | IndustryCode | 共同审计_customer | 共同审计_supplier | |
---|---|---|---|---|---|
0 | 000004 | 2014-12-31 | C27 | 0.0 | 0.0 |
1 | 000006 | 2014-12-31 | K70 | 0.0 | 0.0 |
2 | 000006 | 2015-12-31 | K70 | 1.0 | 0.0 |
3 | 000007 | 2017-12-31 | K70 | 0.0 | 0.0 |
4 | 000007 | 2018-12-31 | K70 | 0.0 | 0.0 |
... | ... | ... | ... | ... | ... |
2552 | 873223 | 2023-12-31 | C34 | 0.0 | 0.0 |
2553 | 873305 | 2022-12-31 | C33 | 0.0 | 0.0 |
2554 | 873305 | 2023-12-31 | C33 | 0.0 | 0.0 |
2555 | 873593 | 2023-12-31 | C38 | 0.0 | 0.0 |
2556 | 873726 | 2023-12-31 | C35 | 0.0 | 0.0 |
2557 rows × 5 columns
"auditCom_temp"] = (
sample.loc[:, "共同审计_customer"] + sample.loc[:, "共同审计_supplier"]
sample.loc[:,
)= sample.loc[:, ["Symbol_sample", "EndDate", "IndustryCode", "auditCom_temp"]] sample
"auditCom"] = [0 if i == 0 else 1 for i in sample.auditCom_temp]
sample.loc[:, "auditCom == 1") sample.query(
Symbol_sample | EndDate | IndustryCode | auditCom_temp | auditCom | |
---|---|---|---|---|---|
2 | 000006 | 2015-12-31 | K70 | 1.0 | 1 |
5 | 000009 | 2012-12-31 | S90 | 1.0 | 1 |
6 | 000009 | 2013-12-31 | S90 | 2.0 | 1 |
7 | 000009 | 2014-12-31 | S90 | 1.0 | 1 |
12 | 000011 | 2019-12-31 | K70 | 1.0 | 1 |
... | ... | ... | ... | ... | ... |
2409 | 688737 | 2021-12-31 | C26 | 1.0 | 1 |
2441 | 831961 | 2022-12-31 | C40 | 1.0 | 1 |
2467 | 833580 | 2023-12-31 | C30 | 1.0 | 1 |
2493 | 835892 | 2023-12-31 | C35 | 1.0 | 1 |
2495 | 835985 | 2023-12-31 | C38 | 1.0 | 1 |
243 rows × 5 columns
= sample.drop(columns=["auditCom_temp"])
final "./Result/混合样本.csv", index=False) final.to_csv(