
Amend Code - Website Scraping Python
$30-250 AUD
Оплачивается при доставке
I am using pycharm and looking for someone to amend the following code so that it can scape the data from multiple URL's from the same website. The URL's are in the same/similar format. Then add in a formula (that divides two cells) and sorts the grouped data in the exported csv file (see attached file: Detailed Explanation on what I need the code to [login to view URL]). The one csv is to include the data from all URL's (see attached file: Summary Spreadsheet csv [login to view URL]).
This is the code that requires amending.
chrome_driver_path = 'Drivers/[login to view URL]'
chrome_options = Options()
# chrome_options.add_argument('--incognito')
# chrome_options.add_argument("--headless")3
url = "[login to view URL]"
driver = [login to view URL](executable_path=chrome_driver_path, options=chrome_options)
[login to view URL](url)
runners = driver.find_elements_by_xpath("//div[@class='runner-name']")
[login to view URL](3)
for i in range(len(runners)):
# print("Runner Name: " + str(runners[i].text))
runners[i].click()
[login to view URL](10)
race_no = driver.find_element_by_class_name("race-number").text
location = driver.find_element_by_class_name("meeting-info-description").text
distance = driver.find_element_by_xpath("//li[@ng-if='[login to view URL]']").text
ages = []
wins_arr = []
places_arr = []
places = driver.find_elements_by_xpath("//*[@current-value='[login to view URL]']")
wins = driver.find_elements_by_xpath("//*[@current-value='[login to view URL](runner)']")
arr_temp = []
for idx_age in range(len(places)):
try:
values = driver.find_elements_by_class_name("form-details-list")[idx_age]
key_vals = values.find_elements_by_tag_name("li")
for idx_val in range(len(key_vals)):
arr_check = []
if str(key_vals[idx_val].find_element_by_class_name("key-span").text).__contains__("Type"):
# print(arr_save.__getitem__(idx_val))
[login to view URL](race_no)
[login to view URL](location)
[login to view URL](distance)
[login to view URL](key_vals[idx_val].find_element_by_class_name("value-span").text)
[login to view URL](wins[idx_age].text)
[login to view URL](places[idx_age].text)
[login to view URL](arr_check)
except:
print("Skip Index: " + str(idx_age))
tables = driver.find_elements_by_class_name("flexible-table")
arr_save = []
header_array: list[Union[str, Any]] = []
for k in range(len(tables)):
rows = tables[k].find_elements_by_class_name("flexible-row")
for j in range(len(rows)):
if len(header_array) == 0 & j == 0:
headers = rows[j].find_elements_by_class_name("flexible-cell")
for m in range(len(headers)):
# print(headers[m].text)
[login to view URL](headers[m].text)
[login to view URL]("Race No.")
[login to view URL]("Location")
[login to view URL]("Distance")
[login to view URL]("Age")
[login to view URL]("Win")
[login to view URL]("Places")
if j > 0:
arr_row = []
columns = rows[j].find_elements_by_class_name("flexible-cell")
for m in range(len(columns)):
# print(columns[m].text)
if m == 12:
[login to view URL](runners[k].text)
else:
[login to view URL](columns[m].text)
for idx_temp in range(len(arr_temp[k])):
[login to view URL](arr_temp[k][idx_temp])
[login to view URL](arr_row)
print("INDEX:" + str(k))
df2 = [login to view URL]([login to view URL](arr_save), columns=header_array)
file_name = str(round([login to view URL]() * 1000))
df2.to_csv(file_name + ".csv")
[login to view URL]()
[login to view URL]()
ID проекта: #36671542