API_KEY = "YOUR_API_KEY"

!pip install -q sec-api

from sec_api import QueryApiqueryApi = QueryApi(api_key=API_KEY)

def flatten_lists(lists):  return [element for sublist in lists for element in sublist]# extract .xml files living in documentFormatFiles listdef extract_xml_urls(filing):  if "documentFormatFiles" in filing and len(filing["documentFormatFiles"]):    urls = list(map(lambda file: file["documentUrl"], filing["documentFormatFiles"]))    xml_urls = list(filter(lambda url: url.endswith(".xml") or url.endswith(".XML"), urls))    return xml_urls  else:    return []def get_all_xml_urls(year = 2022):  form_type = "ABS-EE"  lucene_query = "formType:"{form_type}" AND filedAt:[{year}-01-01 TO {year}-12-31]".format(form_type=form_type, year=year)  xml_urls = []  query_from = 0  while query_from < 10000:    query = {      "query": lucene_query,      "from": query_from,      "size": "50",      "sort": [{ "filedAt": { "order": "desc" } }]    }    response = queryApi.get_filings(query)    if len(response["filings"]) == 0:      break    new_xml_urls = list(map(extract_xml_urls, response["filings"]))    xml_urls = xml_urls + flatten_lists(new_xml_urls)    query_from += 50    # break  return xml_urls

xml_urls = get_all_xml_urls(year=2022)

xml_urls[:10]

['https://www.sec.gov/Archives/edgar/data/1731627/000188852422016521/exh_102.xml', 'https://www.sec.gov/Archives/edgar/data/1731627/000188852422016521/exh_103.xml', 'https://www.sec.gov/Archives/edgar/data/1848411/000188852422016508/exh_102.xml', 'https://www.sec.gov/Archives/edgar/data/1848411/000188852422016508/exh_103.xml', 'https://www.sec.gov/Archives/edgar/data/1742383/000188852422016502/exh_102.xml', 'https://www.sec.gov/Archives/edgar/data/1742383/000188852422016502/exh_103.xml', 'https://www.sec.gov/Archives/edgar/data/1784958/000188852422016499/exh_102.xml', 'https://www.sec.gov/Archives/edgar/data/1784958/000188852422016499/exh_103.xml', 'https://www.sec.gov/Archives/edgar/data/1816861/000188852422016490/exh_102.xml', 'https://www.sec.gov/Archives/edgar/data/1816861/000188852422016490/exh_103.xml']

print("{} XML URLs extracted".format(len(xml_urls)))

14131 XML URLs extracted

# write list of URLs to xml_urls.txt file on local diskwith open(r'xml_urls.txt', "w") as f:  f.writelines(xml_urls)

from sec_api import RenderApirenderApi = RenderApi(api_key=API_KEY)

# https://www.sec.gov/Archives/edgar/data/1731627/000188852422016521/exh_102.xmlurl = xml_urls[0]content = renderApi.get_filing(url)

print("Content snippet of an XML file")print("______________________________")print(content[0:7400])

Content snippet of an XML file______________________________<?xml version="1.0" encoding="us-ascii"?><assetData xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.sec.gov/edgar/document/absee/cmbs/assetdata"> <assets>  <assetTypeNumber>Prospectus Loan ID</assetTypeNumber>  <assetNumber>1</assetNumber>  <reportingPeriodBeginningDate>11-15-2022</reportingPeriodBeginningDate>  <reportingPeriodEndDate>12-12-2022</reportingPeriodEndDate>  <originatorName>Wells Fargo Bank, National Association</originatorName>  <originationDate>02-28-2018</originationDate>  <originalLoanAmount>66666666</originalLoanAmount>  <originalTermLoanNumber>120</originalTermLoanNumber>  <maturityDate>03-06-2028</maturityDate>  <originalAmortizationTermNumber>360</originalAmortizationTermNumber>  <originalInterestRatePercentage>0.043985</originalInterestRatePercentage>  <interestRateSecuritizationPercentage>0.043985</interestRateSecuritizationPercentage>  <interestAccrualMethodCode>3</interestAccrualMethodCode>  <originalInterestRateTypeCode>1</originalInterestRateTypeCode>  <firstLoanPaymentDueDate>04-06-2018</firstLoanPaymentDueDate>  <underwritingIndicator>false</underwritingIndicator>  <lienPositionSecuritizationCode>1</lienPositionSecuritizationCode>  <loanStructureCode>A1</loanStructureCode>  <paymentTypeCode>2</paymentTypeCode>  <periodicPrincipalAndInterestPaymentSecuritizationAmount>325892.11</periodicPrincipalAndInterestPaymentSecuritizationAmount>  <scheduledPrincipalBalanceSecuritizationAmount>66593280.37</scheduledPrincipalBalanceSecuritizationAmount>  <paymentFrequencyCode>1</paymentFrequencyCode>  <NumberPropertiesSecuritization>1</NumberPropertiesSecuritization>  <NumberProperties>1</NumberProperties>  <graceDaysAllowedNumber>0</graceDaysAllowedNumber>  <interestOnlyIndicator>false</interestOnlyIndicator>  <balloonIndicator>true</balloonIndicator>  <prepaymentPremiumIndicator>false</prepaymentPremiumIndicator>  <modifiedIndicator>false</modifiedIndicator>  <prepaymentLockOutEndDate>12-05-2027</prepaymentLockOutEndDate>  <property>   <propertyName>TWELVE OAKS MALL</propertyName>   <propertyAddress>27500 NOVI ROAD</propertyAddress>   <propertyCity>Novi</propertyCity>   <propertyState>MI</propertyState>   <propertyZip>48377</propertyZip>   <propertyCounty>Oakland</propertyCounty>   <propertyTypeCode>RT</propertyTypeCode>   <netRentableSquareFeetNumber>716771</netRentableSquareFeetNumber>   <netRentableSquareFeetSecuritizationNumber>716771</netRentableSquareFeetSecuritizationNumber>   <yearBuiltNumber>1977</yearBuiltNumber>   <yearLastRenovated>2007</yearLastRenovated>   <valuationSecuritizationAmount>552900000</valuationSecuritizationAmount>   <valuationSourceSecuritizationCode>MAI</valuationSourceSecuritizationCode>   <valuationSecuritizationDate>12-14-2017</valuationSecuritizationDate>   <physicalOccupancySecuritizationPercentage>0.92</physicalOccupancySecuritizationPercentage>   <mostRecentPhysicalOccupancyPercentage>0.95</mostRecentPhysicalOccupancyPercentage>   <propertyStatusCode>6</propertyStatusCode>   <defeasanceOptionStartDate>05-06-2020</defeasanceOptionStartDate>   <DefeasedStatusCode>N</DefeasedStatusCode>   <largestTenant>Nordstrom</largestTenant>   <squareFeetLargestTenantNumber>160000</squareFeetLargestTenantNumber>   <leaseExpirationLargestTenantDate>12-31-2022</leaseExpirationLargestTenantDate>   <secondLargestTenant>Crate &amp; Barrel</secondLargestTenant>   <squareFeetSecondLargestTenantNumber>28144</squareFeetSecondLargestTenantNumber>   <leaseExpirationSecondLargestTenantDate>01-31-2031</leaseExpirationSecondLargestTenantDate>   <thirdLargestTenant>Hollister Clothing Stores</thirdLargestTenant>   <squareFeetThirdLargestTenantNumber>24440</squareFeetThirdLargestTenantNumber>   <leaseExpirationThirdLargestTenantDate>01-31-2029</leaseExpirationThirdLargestTenantDate>   <financialsSecuritizationDate>11-30-2017</financialsSecuritizationDate>   <mostRecentFinancialsStartDate>01-01-2022</mostRecentFinancialsStartDate>   <mostRecentFinancialsEndDate>09-30-2022</mostRecentFinancialsEndDate>   <revenueSecuritizationAmount>42899192</revenueSecuritizationAmount>   <mostRecentRevenueAmount>29289986</mostRecentRevenueAmount>   <operatingExpensesSecuritizationAmount>12304366</operatingExpensesSecuritizationAmount>   <operatingExpensesAmount>8089705.9</operatingExpensesAmount>   <netOperatingIncomeSecuritizationAmount>30594826</netOperatingIncomeSecuritizationAmount>   <mostRecentNetOperatingIncomeAmount>21200280.1</mostRecentNetOperatingIncomeAmount>   <netCashFlowFlowSecuritizationAmount>30011109</netCashFlowFlowSecuritizationAmount>   <mostRecentNetCashFlowAmount>20762492.35</mostRecentNetCashFlowAmount>   <netOperatingIncomeNetCashFlowSecuritizationCode>UW</netOperatingIncomeNetCashFlowSecuritizationCode>   <netOperatingIncomeNetCashFlowCode>CREFC</netOperatingIncomeNetCashFlowCode>   <mostRecentDebtServiceAmount>8816607.75</mostRecentDebtServiceAmount>   <debtServiceCoverageNetOperatingIncomeSecuritizationPercentage>2.6</debtServiceCoverageNetOperatingIncomeSecuritizationPercentage>   <mostRecentDebtServiceCoverageNetOperatingIncomePercentage>2.4045</mostRecentDebtServiceCoverageNetOperatingIncomePercentage>   <debtServiceCoverageNetCashFlowSecuritizationPercentage>2.55</debtServiceCoverageNetCashFlowSecuritizationPercentage>   <mostRecentDebtServiceCoverageNetCashFlowpercentage>2.3549</mostRecentDebtServiceCoverageNetCashFlowpercentage>   <debtServiceCoverageSecuritizationCode>F</debtServiceCoverageSecuritizationCode>   <mostRecentDebtServiceCoverageCode>F</mostRecentDebtServiceCoverageCode>   <mostRecentAnnualLeaseRolloverReviewDate>09-30-2022</mostRecentAnnualLeaseRolloverReviewDate>  </property>  <assetAddedIndicator>false</assetAddedIndicator>  <reportPeriodModificationIndicator>false</reportPeriodModificationIndicator>  <reportPeriodBeginningScheduleLoanBalanceAmount>61755756.56</reportPeriodBeginningScheduleLoanBalanceAmount>  <totalScheduledPrincipalInterestDueAmount>328570</totalScheduledPrincipalInterestDueAmount>  <reportPeriodInterestRatePercentage>0.043985</reportPeriodInterestRatePercentage>  <servicerTrusteeFeeRatePercentage>0.0002371</servicerTrusteeFeeRatePercentage>  <scheduledInterestAmount>226360.58</scheduledInterestAmount>  <scheduledPrincipalAmount>102209.42</scheduledPrincipalAmount>  <unscheduledPrincipalCollectedAmount>0</unscheduledPrincipalCollectedAmount>  <reportPeriodEndActualBalanceAmount>61653547.14</reportPeriodEndActualBalanceAmount>  <reportPeriodEndScheduledLoanBalanceAmount>61653547.14</reportPeriodEndScheduledLoanBalanceAmount>  <paidThroughDate>12-06-2022</paidThroughDate>  <servicingAdvanceMethodCode>1</servicingAdvanceMethodCode>  <nonRecoverabilityIndicator>false</nonRecoverabilityIndicator>  <totalPrincipalInterestAdvancedOutstandingAmount>0</totalPrincipalInterestAdvancedOutstandingAmount>  <totalTaxesInsuranceAdvancesOutstandingAmount>0</totalTaxesInsuranceAdvancesOutstandingAmount>  <otherExpensesAdvancedOutstandingAmount>0</otherExpensesAdvancedOutstandingAmount>  <paymentStatusLoanCode>0</paymentStatusLoanCode>  <nextInterestRatePercentage>0</nextInterestRatePercentage>  <primaryServicerName>Wells Fargo Bank, NA</primaryServicerName>  <assetSubjectDemandIndicator>false</assetSubjectDemandIndicator> </assets> <assets>  <assetTypeNumber>Prospectus Loan ID</

import pandas as pdimport osdef download_file(url):  try:    if "url" in url:      url = url["url"]    content = renderApi.get_filing(url)    url_parts = url.split("/")    url_numeric_parts = list(filter(lambda part: part.isnumeric(), url_parts))    # URL: https://www.sec.gov/Archives/edgar/data/1731627/000188852422016521/exh_102.xml    # CIK: 1731627    # accession no: 000188852422016521    # original file name: exh_102.xml    cik = url_numeric_parts[0]    accession_no = url_numeric_parts[1]    original_file_name = url_parts[-1]    download_dir = "xml-files/" + cik    file_name = accession_no + "-" + original_file_name    download_path = download_dir + "/" + file_name    # create dir if it doesn't exist    if not os.path.exists(download_dir):      os.makedirs(download_dir)    with open(download_path, "w") as f:      f.write(content)  except Exception as e:    print(e)    print("❌ download failed: {url}".format(url=url))

download_file("https://www.sec.gov/Archives/edgar/data/1731627/000188852422016521/exh_102.xml")

xml_urls_df = pd.DataFrame(xml_urls, columns=["url"])xml_urls_df.head(10)

!pip install -q pandarallel

from pandarallel import pandarallelnumber_of_workers = 4pandarallel.initialize(progress_bar=True, nb_workers=number_of_workers, verbose=0)# run a quick test and download 40 XML filessample = xml_urls_df.head(40)sample.parallel_apply(download_file, axis=1)# uncomment to download all XML files# xml_urls_df.parallel_apply(download_file, axis=1)print("✅ Download completed")

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=10), Label(value='0 / 10'))), HBox…

✅ Download completed

Asset Data File Type	Description
ABS-AUTOLOAN	Asset pool comprising auto loans.
ABS-AUTOLEASE	Asset pool comprising auto leases.
ABS-DS	Asset pool comprising debt securities.
ABS-RMBS	Asset pool comprising residential mortgages.
ABS-CMBS	Asset pool comprising commercial mortgages.

	url
0	https://www.sec.gov/Archives/edgar/data/173162...
1	https://www.sec.gov/Archives/edgar/data/173162...
2	https://www.sec.gov/Archives/edgar/data/184841...
3	https://www.sec.gov/Archives/edgar/data/184841...
4	https://www.sec.gov/Archives/edgar/data/174238...
5	https://www.sec.gov/Archives/edgar/data/174238...
6	https://www.sec.gov/Archives/edgar/data/178495...
7	https://www.sec.gov/Archives/edgar/data/178495...
8	https://www.sec.gov/Archives/edgar/data/181686...
9	https://www.sec.gov/Archives/edgar/data/181686...

Download XML Files from SEC EDGAR Using Python

Locate URLs of All XML Files

Download All XML Files from EDGAR