• Y
  • List All
  • Feedback
    • This Project
    • All Projects
Profile Account settings Log out
  • Favorite
  • Project
  • All
Loading...
  • Log in
  • Sign up
yjyoon / 웰라인_웹페이지_전력사용량_크롤링 star
  • Project homeH
  • CodeC
  • IssueI
  • Pull requestP
  • Review R
  • MilestoneM
  • BoardB
  • Files
  • Commit
  • Branches
웰라인_웹페이지_전력사용량_크롤링file_download_massive.py
Download as .zip file
File name
Commit message
Commit date
README.md
Hello Yona
2023-11-10
customer_list.csv
Hello Yona
2023-11-10
date_string.csv
Hello Yona
2023-11-10
file_download_massive.py
Hello Yona
2023-11-10
folder_creator.py
Hello Yona
2023-11-10
main.py
Hello Yona
2023-11-10
time_interval_list
Hello Yona
2023-11-10
time_interval_list.csv
Hello Yona
2023-11-10
juni 2023-11-10 97aed7c Hello Yona UNIX
Raw Open in browser Change history
from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.service import Service from webdriver_manager.chrome import ChromeDriverManager from selenium.webdriver.support.ui import Select from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.chrome.service import service import pandas as pd import time def download_files(company_name, time_interval, start_date, end_date, base_dir): options = Options() download_setting = { "download.default_directory" : f"{base_dir}{time_interval}/{company_name}", "download.prompt_for_download" : False, "download.directory_upgrade": True, } options.add_experimental_option("prefs", download_setting) options.add_argument('--disable-dev-shm-usage') driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) driver.get("https://admin.wls.eims.co.kr/WattageState/Report") input_id = driver.find_element(By.CLASS_NAME, 'login-userid') input_password = driver.find_element(By.CLASS_NAME, 'login-password') login_button = driver.find_element(By.CSS_SELECTOR, "div.col-sm-7.text-right button") input_id.clear() input_password.clear() input_id.send_keys("wl00002") input_password.send_keys("2666##") login_button.click() did_it_logged_in = EC.presence_of_element_located((By.ID, 'data-select2')) time.sleep(2) driver.get("https://admin.wls.eims.co.kr/WattageState/Report") time.sleep(2) # ---------- gather the data from the website, by company, time_interval, and date starting_date_input_box = driver.find_element(By.NAME, "from") end_date_input_box = driver.find_element(By.NAME, "to") starting_date_input_box.clear() end_date_input_box.clear() starting_date_input_box.send_keys(start_date) end_date_input_box.send_keys(end_date) drop_down_customer = driver.find_element(By.XPATH, '//select[@data-placeholder="참여고객명"]') # customer_list = drop_down_customer.text.split("\n")[1:] drop_down_time_interval = driver.find_element(By.XPATH, '//select[@data-placeholder="검침기종류"]') # time_interval_list = drop_down_time_interval.text.split("\n")[1:3] select = Select(drop_down_customer) select.select_by_visible_text(company_name) select = Select(drop_down_time_interval) select.select_by_visible_text(time_interval) the_red_button = driver.find_element(By.XPATH, '//button[@ng-click="ctrl.getContractWattageStatistics(ctrl.req)"]') the_red_button.click() the_download_button = driver.find_element(By.XPATH, '//a[@class="btn btn-default btn-outline pull-right btn-sm"]') wait = WebDriverWait(driver, 10) href = wait.until( lambda driver: the_download_button.get_attribute('href') ) the_download_button.click() time.sleep(10) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("-t", "--time_interval", type=int) args = parser.parse_args() time_interval_index = args.time_interval df_c = pd.read_csv("customer_list.csv") df_t = pd.read_csv("time_interval_list.csv") t = df_t.iloc[time_interval_index].values t = t[0].replace(' ', '') df_dates = pd.read_csv("date_string.csv") base_dir = "/media/juni/T7 Shield/웰라인데이터/" # for t in df_t.values: for c in df_c.values: for dates in df_dates.values: download_files(c[0], t, dates[0], dates[1], base_dir)

          
        
    
    
Copyright Yona authors & © NAVER Corp. & NAVER LABS Supported by NAVER CLOUD PLATFORM

or
Sign in with github login with Google Sign in with Google
Reset password | Sign up