import requestsurl = 'https://www.ishares.com/us/products/239714/ishares-russell-3000-etf/1467271812596.ajax?fileType=csv&fileName=IWV_holdings&dataType=fund&asOfDate=20221230'response = requests.get(url)with open('russell-3000.csv', 'wb') as f:    f.write(response.content)

# cleaning CSV fileimport csvwith open('russell-3000.csv', 'r', encoding='utf-8') as f:    reader = csv.reader(f)    rows = list(reader)empty_row_indicies = [i for i in range(len(rows)) if (len(rows[i]) == 0 or '\xa0' in rows[i])]print('Empty rows:', empty_row_indicies)start = empty_row_indicies[0] + 1end = empty_row_indicies[1]cleaned_rows = rows[start:end]with open('russell-3000-clean.csv', 'w', newline='') as f:    writer = csv.writer(f)    writer.writerows(cleaned_rows)

Empty rows: [8, 2621]

import pandas as pd# load Russell 3000 holdings CSV into a dataframeholdings = pd.read_csv('./russell-3000-clean.csv')print('Russell 3000 Constituents')holdings

Russell 3000 Constituents

API_KEY = 'YOUR_API_KEY'

!pip install -q sec-api

# create batches of tickers: [[A,B,C], [D,E,F], ...]# a single batch has a maximum of max_length_of_batch tickersdef create_batches(tickers = [], max_length_of_batch = 25):  batches = [[]]  for ticker in tickers:    if len(batches[len(batches)-1]) == max_length_of_batch:      batches.append([])    batches[len(batches)-1].append(ticker)  return batchesbatches = create_batches(list(holdings['Ticker']))

from sec_api import QueryApi, RenderApifrom pathlib import Pathimport multiprocessingqueryApi = QueryApi(api_key=API_KEY)def download_10K_metadata(tickers = [], start_year = 2021, end_year = 2022):  # if Path('metadata.csv').is_file():  #   print('✅ Reading metadata from metadata.csv')  #   result = pd.read_csv('metadata.csv')  #   return result  print('✅ Starting downloading metadata for years {} to {}'.format(start_year, end_year))  # create ticker batches, with 25 tickers per batch  batches = create_batches(tickers)  frames = []  for year in range(start_year, end_year + 1):    for batch in batches:      tickers_joined = ', '.join(batch)      ticker_query = f'ticker:({tickers_joined})'      filed_at_query = f'filedAt:[{year}-01-01 TO {year}-12-31]'      form_type_query = 'formType:"10-K" AND NOT formType:"10-K/A" AND NOT formType:NT'      query_string = ticker_query + ' AND '  + filed_at_query + ' AND ' + form_type_query      query = {        "query": query_string,        "from": "0",        "size": "50",        "sort": [{ "filedAt": { "order": "desc" } }]      }      response = queryApi.get_filings(query)      filings = response['filings']      metadata = list(map(lambda f: {'ticker': f['ticker'],                                     'cik': f['cik'],                                     'formType': f['formType'],                                     'filedAt': f['filedAt'],                                     'filingUrl': f['linkToFilingDetails']}, filings))      df = pd.DataFrame.from_records(metadata)      frames.append(df)    print('✅ Downloaded metadata for year', year)  result = pd.concat(frames)  result.to_csv('metadata.csv', index=False)  number_metadata_downloaded = len(result)  print('✅ Download completed. Metadata downloaded for {} filings.'.format(number_metadata_downloaded))  return resulttickers = list(holdings['Ticker'])metadata = download_10K_metadata(tickers=tickers, start_year=2021, end_year=2022)

✅ Starting downloading metadata for years 2021 to 2022✅ Downloaded metadata for year 2021✅ Downloaded metadata for year 2022✅ Download completed. Metadata downloaded for 4861 filings.

print('SEC form types:', list(metadata.formType.unique()))print('Number of 10-K URLs:', len(metadata))print('Metadata of the first 10 filings:')metadata.head(10)

SEC form types: ['10-K']Number of 10-K URLs: 4861Metadata of the first 10 filings:

import osrenderApi = RenderApi(api_key=API_KEY)def download_filing(metadata):  try:    ticker = metadata['ticker']    new_folder = "./filings/" + ticker    if not os.path.isdir(new_folder):      os.makedirs(new_folder)    url = metadata['filingUrl'].replace('ix?doc=/', '')    file_content = renderApi.get_filing(url)    file_name = url.split("/")[-1]    with open(new_folder + "/" + file_name, "w") as f:      f.write(file_content)  except:     print('❌ {ticker}: downloaded failed: {url}'.format(ticker=ticker, url=url))

# downloaded sample filingdownload_filing(metadata.iloc[0])print('✅ Sample 10-K filing downloaded for {}'.format(metadata.iloc[0]['ticker']))

✅ Sample 10-K filing downloaded for AVGO

!pip install -q pandarallel

from pandarallel import pandarallelnumber_of_workers = 4pandarallel.initialize(progress_bar=True, nb_workers=number_of_workers, verbose=0)# uncomment to run a quick sample and download 50 filingssample = metadata.head(50)sample.parallel_apply(download_filing, axis=1)# download all filings# metadata.parallel_apply(download_filing, axis=1)print('✅ Download completed')

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=13), Label(value='0 / 13'))), HBox…

✅ Download completed

	Ticker	Name	Sector	Asset Class	Market Value	Weight (%)	Notional Value	Shares	Price	Location	Exchange	Currency	FX Rate	Market Currency	Accrual Date
0	AAPL	APPLE INC	Information Technology	Equity	559,365,151.11	5.16	559,365,151.11	4,305,127.00	129.93	United States	NASDAQ	USD	1.0	USD	-
1	MSFT	MICROSOFT CORP	Information Technology	Equity	513,917,712.42	4.74	513,917,712.42	2,142,931.00	239.82	United States	NASDAQ	USD	1.0	USD	-
2	AMZN	AMAZON COM INC	Consumer Discretionary	Equity	213,823,596.00	1.97	213,823,596.00	2,545,519.00	84.00	United States	NASDAQ	USD	1.0	USD	-
3	BRKB	BERKSHIRE HATHAWAY INC CLASS B	Financials	Equity	159,603,687.60	1.47	159,603,687.60	516,684.00	308.90	United States	New York Stock Exchange Inc.	USD	1.0	USD	-
4	GOOGL	ALPHABET INC CLASS A	Communication	Equity	151,996,026.75	1.40	151,996,026.75	1,722,725.00	88.23	United States	NASDAQ	USD	1.0	USD	-
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
2606	P5N994	Petrocorp Inc Escrow	Energy	Equity	0.01	0.00	0.01	1,248.00	0.00	United States	Non-Nms Quotation Service (Nnqs)	USD	1.0	USD	-
2607	-	OMNIAB INC $12.50 VESTING Prvt	Health Care	Equity	0.02	0.00	0.02	1,502.00	0.00	United States	NO MARKET (E.G. UNLISTED)	USD	1.0	USD	-
2608	-	OMNIAB INC $15.00 VESTING Prvt	Health Care	Equity	0.02	0.00	0.02	1,502.00	0.00	United States	NO MARKET (E.G. UNLISTED)	USD	1.0	USD	-
2609	TFM	FRESH MARKET ESCROW	Consumer Staples	Equity	0.11	0.00	0.11	10,847.00	0.00	United States	NASDAQ	USD	1.0	USD	-
2610	MARGIN_USD	FUTURES USD MARGIN BALANCE	Cash and/or Derivatives	Cash Collateral and Margins	-75,205.00	0.00	-75,205.00	-75,205.00	100.00	United States	-	USD	1.0	USD	Jan 01, 2000

	ticker	cik	formType	filedAt	filingUrl
0	AVGO	1730168	10-K	2021-12-17T16:42:51-05:00	https://www.sec.gov/Archives/edgar/data/173016...
1	AMAT	6951	10-K	2021-12-17T16:14:51-05:00	https://www.sec.gov/Archives/edgar/data/6951/0...
2	DE	315189	10-K	2021-12-16T11:39:34-05:00	https://www.sec.gov/Archives/edgar/data/315189...
3	ADI	6281	10-K	2021-12-03T16:02:52-05:00	https://www.sec.gov/Archives/edgar/data/6281/0...
4	DIS	1744489	10-K	2021-11-24T16:34:25-05:00	https://www.sec.gov/Archives/edgar/data/174448...
5	SBUX	829224	10-K	2021-11-19T16:46:02-05:00	https://www.sec.gov/Archives/edgar/data/829224...
6	V	1403161	10-K	2021-11-18T16:06:50-05:00	https://www.sec.gov/Archives/edgar/data/140316...
7	QCOM	804328	10-K	2021-11-03T16:06:41-04:00	https://www.sec.gov/Archives/edgar/data/804328...
8	AAPL	320193	10-K	2021-10-28T18:04:28-04:00	https://www.sec.gov/Archives/edgar/data/320193...
9	ACN	1467373	10-K	2021-10-15T06:54:29-04:00	https://www.sec.gov/Archives/edgar/data/146737...

Download 10-K Filings of Russell 3000 Companies from SEC EDGAR

Create a List of Tickers of Russell 3000 Constituents

Create a List of All 10-K URLs on SEC EDGAR

Download All 10-K Filings