API_KEY = 'YOUR_API_KEY'

!pip install -q sec-api

from sec_api import XbrlApixbrlApi = XbrlApi(API_KEY)

# URL of Apple's 10-K filingsurl_10k_aapl = 'https://www.sec.gov/Archives/edgar/data/320193/000032019322000108/aapl-20220924.htm'aapl_xbrl_json = xbrlApi.xbrl_to_json(htm_url=url_10k_aapl)

print("Keys of income statement dictionary in XBRL from Apple's 10-K filing")print('--------------------------------------------------------------------')print(*list(aapl_xbrl_json['StatementsOfIncome'].keys()), sep='')

Keys of income statement dictionary in XBRL from Apple's 10-K filing--------------------------------------------------------------------RevenueFromContractWithCustomerExcludingAssessedTaxCostOfGoodsAndServicesSoldGrossProfitResearchAndDevelopmentExpenseSellingGeneralAndAdministrativeExpenseOperatingExpensesOperatingIncomeLossNonoperatingIncomeExpenseIncomeLossFromContinuingOperationsBeforeIncomeTaxesExtraordinaryItemsNoncontrollingInterestIncomeTaxExpenseBenefitNetIncomeLossEarningsPerShareBasicEarningsPerShareDilutedWeightedAverageNumberOfSharesOutstandingBasicWeightedAverageNumberOfDilutedSharesOutstanding

import pandas as pd # convert XBRL-JSON of income statement to pandas dataframedef get_income_statement(xbrl_json):    income_statement_store = {}    # iterate over each US GAAP item in the income statement    for usGaapItem in xbrl_json['StatementsOfIncome']:        values = []        indicies = []        for fact in xbrl_json['StatementsOfIncome'][usGaapItem]:            # only consider items without segment. not required for our analysis.            if 'segment' not in fact:                index = fact['period']['startDate'] + '-' + fact['period']['endDate']                # ensure no index duplicates are created                if index not in indicies:                    values.append(fact['value'])                    indicies.append(index)                            income_statement_store[usGaapItem] = pd.Series(values, index=indicies)     income_statement = pd.DataFrame(income_statement_store)    # switch columns and rows so that US GAAP items are rows and each column header represents a date range    return income_statement.T income_statement_aapl = get_income_statement(aapl_xbrl_json)print("Income statement of Apple's 2022 10-K filing as dataframe")print('---------------------------------------------------------')income_statement_aapl

Income statement of Apple's 2022 10-K filing as dataframe---------------------------------------------------------

url_10k_aapl = 'https://www.sec.gov/Archives/edgar/data/320193/000032019322000108/aapl-20220924.htm'xbrl_json_aapl = xbrlApi.xbrl_to_json(htm_url=url_10k_aapl)income_statement_aapl = get_income_statement(xbrl_json_aapl)income_statement_aapl.to_excel('income-statement-aapl.xlsx')

# Apple's 10Ks of the last 4 years, 2019 to 2022url_10k_2019 = 'https://www.sec.gov/Archives/edgar/data/320193/000032019319000119/a10-k20199282019.htm'url_10k_2020 = 'https://www.sec.gov/Archives/edgar/data/320193/000032019320000096/aapl-20200926.htm'url_10k_2021 = 'https://www.sec.gov/Archives/edgar/data/320193/000032019321000105/aapl-20210925.htm'url_10k_2022 = 'https://www.sec.gov/Archives/edgar/data/320193/000032019322000108/aapl-20220924.htm'xbrl_json_2019 = xbrlApi.xbrl_to_json(htm_url=url_10k_2019)xbrl_json_2020 = xbrlApi.xbrl_to_json(htm_url=url_10k_2020)xbrl_json_2021 = xbrlApi.xbrl_to_json(htm_url=url_10k_2021)xbrl_json_2022 = xbrlApi.xbrl_to_json(htm_url=url_10k_2022)

income_statement_2019 = get_income_statement(xbrl_json_2019)income_statement_2020 = get_income_statement(xbrl_json_2020)income_statement_2021 = get_income_statement(xbrl_json_2021)income_statement_2022 = get_income_statement(xbrl_json_2022)

income_statements_merged = pd.concat([income_statement_2019, income_statement_2020, income_statement_2021, income_statement_2022], axis=0, sort=False)# sort & reset the index of the merged dataframeincome_statements_merged = income_statements_merged.sort_index().reset_index()# convert cells to floatincome_statements_merged = income_statements_merged.applymap(lambda x: pd.to_numeric(x, errors='ignore'))print("Merged, uncleaned financials of all income statements")print('-----------------------------------------------------')income_statements_merged.head(10)

Merged, uncleaned financials of all income statements-----------------------------------------------------

income_statements = income_statements_merged.groupby('index').max()# reindex the merged dataframe using the index of the first dataframeincome_statements = income_statements.reindex(income_statement_2019.index)# loop over the columnsfor col in income_statements.columns[1:]:    # extract start and end dates from the column label    splitted = col.split('-')    start = '-'.join(splitted[:3])    end = '-'.join(splitted[3:])    # convert start and end dates to datetime objects    start_date = pd.to_datetime(start)    end_date = pd.to_datetime(end)    # calculate the duration between start and end dates    duration = (end_date - start_date).days / 360    # drop the column if duration is less than a year    if duration < 1:        income_statements.drop(columns=[col], inplace=True)# convert datatype of cells to readable format, e.g. "2.235460e+11" becomes "223546000000"income_statements = income_statements.apply(lambda row: pd.to_numeric(row, errors='coerce', downcast='integer').astype(str), axis=1) print("Income statements from Apple's 10-K filings (2016 to 2022) as dataframe")print('------------------------------------------------------------------------')income_statements

Income statements from Apple's 10-K filings (2016 to 2022) as dataframe------------------------------------------------------------------------

all_revenues_json = xbrl_json_2019['StatementsOfIncome']['RevenueFromContractWithCustomerExcludingAssessedTax'] + \                    xbrl_json_2020['StatementsOfIncome']['RevenueFromContractWithCustomerExcludingAssessedTax'] + \                    xbrl_json_2021['StatementsOfIncome']['RevenueFromContractWithCustomerExcludingAssessedTax'] + \                    xbrl_json_2022['StatementsOfIncome']['RevenueFromContractWithCustomerExcludingAssessedTax']all_revenues = pd.json_normalize(all_revenues_json)all_revenues.drop_duplicates(inplace=True)

# convert the 'value' column to a numeric typeall_revenues['value'] = all_revenues['value'].astype(int)mask_iphone = all_revenues['segment.value'] == 'aapl:IPhoneMember'mask_ipad = all_revenues['segment.value'] == 'aapl:IPadMember'mask_mac = all_revenues['segment.value'] == 'aapl:MacMember'mask_wearables = all_revenues['segment.value'] == 'aapl:WearablesHomeandAccessoriesMember'revenue_product = all_revenues[(mask_iphone | mask_ipad | mask_mac | mask_wearables)]# pivot the dataframe to create a new dataframe with period.endDate as the index, # segment.value as the columns, and value as the valuesrevenue_product_pivot = revenue_product.pivot(index='period.endDate', columns='segment.value', values='value')print("Apple's revenues by product from 2017 to 2022")print('---------------------------------------------')revenue_product_pivot

Apple's revenues by product from 2017 to 2022---------------------------------------------

import matplotlib.pyplot as pltimport matplotlib.ticker as ticker# plot the histogram bar chartax = revenue_product_pivot.plot(kind='bar', stacked=True, figsize=(8, 6))# rotate the x-axis labels by 0 degreesplt.xticks(rotation=0)# set the title and labels for the chartax.set_title("Apple's Revenue by Product Category", fontsize=16, fontweight='bold')ax.set_xlabel('Period End Date', fontsize=12)ax.set_ylabel('Revenue (USD)', fontsize=12)# set the legend propertiesax.legend(title='Product Category', loc='upper left', fontsize='small', title_fontsize=10)# add gridlinesax.grid(axis='y', linestyle='--', alpha=0.3)# remove the top and right spinesax.spines['top'].set_visible(False)ax.spines['right'].set_visible(False)# format y-axis ticks to show values in millions in dollarsformatter = ticker.FuncFormatter(lambda x, pos: '$%1.0fB' % (x*1e-9))plt.gca().yaxis.set_major_formatter(formatter)# map the original labels to new labelslabel_map = {    'aapl:IPhoneMember': 'iPhone',    'aapl:MacMember': 'Mac',    'aapl:IPadMember': 'iPad',    'aapl:WearablesHomeandAccessoriesMember': 'Wearables & Home'}# create a list of new labels based on the original labelsnew_labels = [label_map[label] for label in sorted(revenue_product['segment.value'].unique())]handles, _ = ax.get_legend_handles_labels()plt.legend(handles=handles[::-1], labels=new_labels[::-1])# add the values in billions of dollars to each part of the barfor p in ax.containers:    ax.bar_label(p, labels=['%.1f' % (v/1e9) for v in p.datavalues],                  label_type='center', fontsize=8)plt.show()

mask_america = all_revenues['segment.value'] == 'aapl:AmericasSegmentMember'mask_europe = all_revenues['segment.value'] == 'aapl:EuropeSegmentMember'mask_china = all_revenues['segment.value'] == 'aapl:GreaterChinaSegmentMember'mask_japan = all_revenues['segment.value'] == 'aapl:JapanSegmentMember'mask_asia_rest = all_revenues['segment.value'] == 'aapl:RestOfAsiaPacificSegmentMember'revenue_geo = all_revenues[(mask_america | mask_europe | mask_china | mask_japan | mask_asia_rest)]# pivot the dataframe to create a new dataframe with period.endDate as the index, segment.value as the columns, and value as the valuesrevenue_geo_pivot = revenue_geo.pivot(index='period.endDate', columns='segment.value', values='value')print("Apple's revenues by region from 2017 to 2022")print('---------------------------------------------')revenue_geo_pivot

Apple's revenues by region from 2017 to 2022---------------------------------------------

# plot the histogram bar chartax = revenue_geo_pivot.plot(kind='bar', stacked=True, figsize=(8, 6))# rotate the x-axis labels by 0 degreesplt.xticks(rotation=0)# set the title and labels for the chartax.set_title("Apple's Revenue by Region", fontsize=16, fontweight='bold')ax.set_xlabel('Period End Date', fontsize=12)ax.set_ylabel('Revenue (USD)', fontsize=12)# set the legend propertiesax.legend(title='Region', loc='upper left', fontsize='small', title_fontsize=10)# add gridlinesax.grid(axis='y', linestyle='--', alpha=0.3)# remove the top and right spinesax.spines['top'].set_visible(False)ax.spines['right'].set_visible(False)# format y-axis ticks to show values in millions in dollarsformatter = ticker.FuncFormatter(lambda x, pos: '$%1.0fB' % (x*1e-9))plt.gca().yaxis.set_major_formatter(formatter)# map the original labels to new labelslabel_map = {    'aapl:AmericasSegmentMember': 'America',    'aapl:EuropeSegmentMember': 'Europe',    'aapl:GreaterChinaSegmentMember': 'China',    'aapl:JapanSegmentMember': 'Japan',    'aapl:RestOfAsiaPacificSegmentMember': 'Asia Rest'}# create a list of new labels based on the original labelsnew_labels = [label_map[label] for label in sorted(revenue_geo['segment.value'].unique())]handles, _ = ax.get_legend_handles_labels()plt.legend(handles=handles[::-1], labels=new_labels[::-1])# add the values in billions of dollars to each part of the barfor p in ax.containers:    ax.bar_label(p, labels=['%.1f' % (v/1e9) for v in p.datavalues],                  label_type='center', fontsize=8)plt.show()

Financial Statement	XBRL-JSON Counterpart
Income statement	`xbrl_json['StatementsOfIncome']`
Balance sheet	`xbrl_json['BalanceSheets']`
Cash flow statement	`xbrl_json['StatementsOfCashFlows']`

	2021-09-26-2022-09-24	2020-09-27-2021-09-25	2019-09-29-2020-09-26
RevenueFromContractWithCustomerExcludingAssessedTax	394328000000	365817000000	274515000000
CostOfGoodsAndServicesSold	223546000000	212981000000	169559000000
GrossProfit	170782000000	152836000000	104956000000
ResearchAndDevelopmentExpense	26251000000	21914000000	18752000000
SellingGeneralAndAdministrativeExpense	25094000000	21973000000	19916000000
OperatingExpenses	51345000000	43887000000	38668000000
OperatingIncomeLoss	119437000000	108949000000	66288000000
NonoperatingIncomeExpense	-334000000	258000000	803000000
IncomeLossFromContinuingOperationsBeforeIncomeTaxesExtraordinaryItemsNoncontrollingInterest	119103000000	109207000000	67091000000
IncomeTaxExpenseBenefit	19300000000	14527000000	9680000000
NetIncomeLoss	99803000000	94680000000	57411000000
EarningsPerShareBasic	6.15	5.67	3.31
EarningsPerShareDiluted	6.11	5.61	3.28
WeightedAverageNumberOfSharesOutstandingBasic	16215963000	16701272000	17352119000
WeightedAverageNumberOfDilutedSharesOutstanding	16325819000	16864919000	17528214000

Human-Readable 10-K Filing	Counterpart in XBRL
Total net sales	RevenueFromContractWithCustomerExcludingAssessedTax
Total cost of sales	CostOfGoodsAndServicesSold
Gross margin	GrossProfit
Research and development	ResearchAndDevelopmentExpense
Selling, general and administrative	SellingGeneralAndAdministrativeExpense
Total operating expenses	OperatingExpenses
Operating income	OperatingIncomeLoss
Other income/(expense), net	NonoperatingIncomeExpense
Income before provision for income taxes	IncomeLossFromContinuingOperationsBeforeIncomeTaxesExtraordinaryItemsNoncontrollingInterest
Provision for income taxes	IncomeTaxExpenseBenefit
Net income	NetIncomeLoss
Earnings per share: Basic	EarningsPerShareBasic
Earnings per share: Diluted	EarningsPerShareDiluted
Shares used in computing earnings per share: Basic	WeightedAverageNumberOfSharesOutstandingBasic
Shares used in computing earnings per share: Diluted	WeightedAverageNumberOfDilutedSharesOutstanding

	index	2016-09-25-2017-09-30	2017-10-01-2017-12-30	2017-10-01-2018-09-29	2017-12-31-2018-03-31	2018-04-01-2018-06-30	2018-07-01-2018-09-29	2018-09-30-2018-12-29	2018-09-30-2019-09-28	2018-12-30-2019-03-30	2019-03-31-2019-06-29	2019-06-30-2019-09-28	2019-09-29-2019-12-28	2019-09-29-2020-09-26	2019-12-29-2020-03-28	2020-03-29-2020-06-27	2020-06-28-2020-09-26	2020-09-27-2021-09-25	2021-09-26-2022-09-24
0	CostOfGoodsAndServicesSold	1.410480e+11	NaN	1.637560e+11	NaN	NaN	NaN	NaN	1.617820e+11	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	CostOfGoodsAndServicesSold	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1.695590e+11	NaN	NaN	NaN	2.129810e+11	2.235460e+11
2	CostOfGoodsAndServicesSold	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1.617820e+11	NaN	NaN	NaN	NaN	1.695590e+11	NaN	NaN	NaN	2.129810e+11	NaN
3	CostOfGoodsAndServicesSold	NaN	NaN	1.637560e+11	NaN	NaN	NaN	NaN	1.617820e+11	NaN	NaN	NaN	NaN	1.695590e+11	NaN	NaN	NaN	NaN	NaN
4	EarningsPerShareBasic	NaN	NaN	3.000000e+00	NaN	NaN	NaN	1.05	2.990000e+00	0.62	0.55	0.76	1.26	3.310000e+00	0.64	0.65	0.74	NaN	NaN
5	EarningsPerShareBasic	NaN	NaN	NaN	NaN	NaN	NaN	NaN	2.990000e+00	NaN	NaN	NaN	NaN	3.310000e+00	NaN	NaN	NaN	5.670000e+00	NaN
6	EarningsPerShareBasic	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	3.310000e+00	NaN	NaN	NaN	5.670000e+00	6.150000e+00
7	EarningsPerShareBasic	9.270000e+00	3.92	1.201000e+01	2.75	2.36	2.94	4.22	1.197000e+01	2.47	2.20	3.05	NaN	NaN	NaN	NaN	NaN	NaN	NaN
8	EarningsPerShareDiluted	9.210000e+00	3.89	1.191000e+01	2.73	2.34	2.91	4.18	1.189000e+01	2.46	2.18	3.03	NaN	NaN	NaN	NaN	NaN	NaN	NaN
9	EarningsPerShareDiluted	NaN	NaN	2.980000e+00	NaN	NaN	NaN	1.05	2.970000e+00	0.61	0.55	0.76	1.25	3.280000e+00	0.64	0.65	0.73	NaN	NaN

	2016-09-25-2017-09-30	2017-10-01-2018-09-29	2018-09-30-2019-09-28	2019-09-29-2020-09-26	2020-09-27-2021-09-25	2021-09-26-2022-09-24
RevenueFromContractWithCustomerExcludingAssessedTax	229234000000	265595000000	260174000000	274515000000	365817000000	394328000000
CostOfGoodsAndServicesSold	141048000000	163756000000	161782000000	169559000000	212981000000	223546000000
GrossProfit	88186000000	101839000000	98392000000	104956000000	152836000000	170782000000
ResearchAndDevelopmentExpense	11581000000	14236000000	16217000000	18752000000	21914000000	26251000000
SellingGeneralAndAdministrativeExpense	15261000000	16705000000	18245000000	19916000000	21973000000	25094000000
OperatingExpenses	26842000000	30941000000	34462000000	38668000000	43887000000	51345000000
OperatingIncomeLoss	61344000000	70898000000	63930000000	66288000000	108949000000	119437000000
NonoperatingIncomeExpense	2745000000	2005000000	1807000000	803000000	258000000	-334000000
IncomeLossFromContinuingOperationsBeforeIncomeTaxesExtraordinaryItemsNoncontrollingInterest	64089000000	72903000000	65737000000	67091000000	109207000000	119103000000
IncomeTaxExpenseBenefit	15738000000	13372000000	10481000000	9680000000	14527000000	19300000000
NetIncomeLoss	48351000000	59531000000	55256000000	57411000000	94680000000	99803000000
EarningsPerShareBasic	9.27	12.01	11.97	3.31	5.67	6.15
EarningsPerShareDiluted	9.21	11.91	11.89	3.28	5.61	6.11
WeightedAverageNumberOfSharesOutstandingBasic	5217242000	19821510000	18471336000	17352119000	16701272000	16215963000
WeightedAverageNumberOfDilutedSharesOutstanding	5251692000	20000435000	18595651000	17528214000	16864919000	16325819000

Extract Financial Statements from SEC Filings and XBRL Data with Python

Overview of XBRL and the Converter API

XBRL Files of Apple's 10-K

Getting Started

Extract Income Statement from XBRL Data in 10-K Filing

Export Income Statement from SEC 10-K Filing to Excel

Extract and Merge Financial Statements from Multiple 10-K Filings

Extract and Visualize Revenue by Product Category per Year from 10-Ks

Extract and Visualize Revenue by Region per Year from 10-Ks

segment.value	aapl:IPadMember	aapl:IPhoneMember	aapl:MacMember	aapl:WearablesHomeandAccessoriesMember
period.endDate
2017-09-30	18802000000	139337000000	25569000000	12826000000
2018-09-29	18380000000	164888000000	25198000000	17381000000
2019-09-28	21280000000	142381000000	25740000000	24482000000
2020-09-26	23724000000	137781000000	28622000000	30620000000
2021-09-25	31862000000	191973000000	35190000000	38367000000
2022-09-24	29292000000	205489000000	40177000000	41241000000

segment.value	aapl:AmericasSegmentMember	aapl:EuropeSegmentMember	aapl:GreaterChinaSegmentMember	aapl:JapanSegmentMember	aapl:RestOfAsiaPacificSegmentMember
period.endDate
2017-09-30	96600000000	54938000000	44764000000	17733000000	15199000000
2018-09-29	112093000000	62420000000	51942000000	21733000000	17407000000
2019-09-28	116914000000	60288000000	43678000000	21506000000	17788000000
2020-09-26	124556000000	68640000000	40308000000	21418000000	19593000000
2021-09-25	153306000000	89307000000	68366000000	28482000000	26356000000
2022-09-24	169658000000	95118000000	74200000000	25977000000	29375000000