import pandas as pd
import requests
from google.colab import files
# API URL and key
service_key = "kGUKTo8Mh/FFlsfrtw7HOGgfkKwZkXH8TNNzDcOdXpdg4I5RrbSu89yzFp9PLET6xzVDhwQU5VyRCUKIoG2YQg=="
# Function to fetch data from API
def fetch_data(year, page_no, num_of_rows):
params = {
'ServiceKey': service_key,
'type': 'xml',
'pageNo': page_no,
'numOfRows': num_of_rows,
'bas_yy': year
}
response = requests.get(api_url, params=params)
response.raise_for_status() # Check for request errors
return response.content
# Parse XML and extract data
def parse_xml(xml_data):
root = ET.fromstring(xml_data)
data = []
for row in root.findall('.//row'):
record = {}
for elem in row:
record[elem.tag] = elem.text
data.append(record)
return data
# Loop through years and pages to fetch all data
all_data = []
years = range(2010, 2020) # Change range as needed
num_of_rows = 1000
for year in years:
page_no = 1
while True:
xml_data = fetch_data(year, page_no, num_of_rows)
data = parse_xml(xml_data)
if not data:
break # No more data to fetch
all_data.extend(data)
page_no += 1
# Create DataFrame and save to Excel
df = pd.DataFrame(all_data)
excel_file = 'AdministrativeDistStatPopSize_all_years.xlsx'
df.to_excel(excel_file, index=False)
# Download the Excel file
files.download(excel_file)
import pandas as pd
import requests
import xml.etree.ElementTree as ET
from google.colab import files
# API URL and key
service_key = "kGUKTo8Mh/FFlsfrtw7HOGgfkKwZkXH8TNNzDcOdXpdg4I5RrbSu89yzFp9PLET6xzVDhwQU5VyRCUKIoG2YQg=="
# Column name mapping from English to Korean
column_mapping = {
"totalCount": "์ ์ฒด ๊ฒฐ๊ณผ ์",
"numOfRows": "ํ ํ์ด์ง๊ฒฐ๊ณผ ์",
"pageNo": "ํ์ด์ง ๋ฒํธ",
"type": "์์ ๋ฌธ์ํ์",
"resultCode": "๊ฒฐ๊ณผ์ฝ๋",
"resultMsg": "๊ฒฐ๊ณผ๋ฉ์ธ์ง",
"bas_yy": "๊ธฐ์ค๋
๋",
"city_smry": "์ ๊ณ",
"city_tths50_mor": "์ 50๋ง์ด์",
"city_tths30_ut_tths50": "์ 30๋ง์ด์ 50๋ง๋ฏธ๋ง",
"city_tths10_ut_tths30": "์ 10๋ง์ด์ 30๋ง๋ฏธ๋ง",
"city_tths10_lss": "์ 10๋ง๋ฏธ๋ง",
"cnti_smry": "๊ตฐ ๊ณ",
"cnti_tths10_mor": "๊ตฐ 10๋ง์ด์",
"cnti_tths5_ut_tths10": "๊ตฐ 5๋ง์ด์ 10๋ง๋ฏธ๋ง",
"cnti_tths3_ut_tths5": "๊ตฐ 3๋ง์ด์ 5๋ง๋ฏธ๋ง",
"cnti_tths3_lss": "๊ตฐ 3๋ง๋ฏธ๋ง",
"atodstri_smry": "์์น๊ตฌ ๊ณ",
"atodstri_tths50_mor": "์์น๊ตฌ 50๋ง์ด์",
"atodstri_tths30_ut_tths50": "์์น๊ตฌ 30๋ง์ด์ 50๋ง๋ฏธ๋ง",
"atodstri_tths30_lss": "์์น๊ตฌ 30๋ง๋ฏธ๋ง",
"eup_smry": "์ ๊ณ",
"eup_tths3_mor": "์ 3๋ง์ด์",
"eup_tths2_ut_tths3": "์ 2๋ง์ด์ 3๋ง๋ฏธ๋ง",
"eup_tths1_ut_tths2": "์ 1๋ง์ด์ 2๋ง๋ฏธ๋ง",
"eup_tths1_lss": "์ 1๋ง๋ฏธ๋ง",
"myeon_smry": "๋ฉด ๊ณ",
"myeon_tths2_mor": "๋ฉด 2๋ง์ด์",
"myeon_tths1_ut_tths2": "๋ฉด 1๋ง์ด์ 2๋ง๋ฏธ๋ง",
"myeon_ths5_ut_tths1": "๋ฉด 5์ฒ์ด์ 1๋ง๋ฏธ๋ง",
"myeon_ths5_lss": "๋ฉด 5์ฒ๋ฏธ๋ง",
"dong_smry": "๋ ๊ณ",
"dong_tths3_mor": "๋ 3๋ง์ด์",
"dong_tths2_ut_tths3": "๋ 2๋ง์ด์ 3๋ง๋ฏธ๋ง",
"dong_tths1_ut_tths2": "๋ 1๋ง์ด์ 2๋ง๋ฏธ๋ง",
"dong_ths5_ut_tths1": "๋ 5์ฒ์ด์ 1๋ง๋ฏธ๋ง",
"dong_ths5_lss": "๋ 5์ฒ๋ฏธ๋ง"
}
# Function to fetch data from API
def fetch_data(year, page_no, num_of_rows):
params = {
'ServiceKey': service_key,
'type': 'xml',
'pageNo': page_no,
'numOfRows': num_of_rows,
'bas_yy': year
}
response = requests.get(api_url, params=params)
response.raise_for_status() # Check for request errors
return response.content
# Parse XML and extract data
def parse_xml(xml_data):
root = ET.fromstring(xml_data)
data = []
for row in root.findall('.//row'):
record = {}
for elem in row:
record[elem.tag] = elem.text
data.append(record)
return data
# Loop through years and pages to fetch all data
all_data = []
years = range(2010, 2020) # Adjust the range as needed
num_of_rows = 1000
for year in years:
page_no = 1
while True:
xml_data = fetch_data(year, page_no, num_of_rows)
data = parse_xml(xml_data)
if not data:
break # No more data to fetch
all_data.extend(data)
page_no += 1
# Create DataFrame and rename columns to Korean
df = pd.DataFrame(all_data)
df.rename(columns=column_mapping, inplace=True)
# Save to Excel
excel_file = 'AdministrativeDistStatPopSize_all_years_kr.xlsx'
df.to_excel(excel_file, index=False)
# Download the Excel file
files.download(excel_file)