Kore'De Lise Kaç Yaşında Başlar ?

Deniz

New member
from selenium.common.exceptions import WebDriverException

from selenium.webdriver.support.ui import WebDriverWait

from selenium.webdriver.chrome.service import Service

from webdriver_manager.chrome import ChromeDriverManager

from selenium import webdriver

from selenium.webdriver.common.keys import Keys

from selenium.webdriver.common.by import By

from bs4 import BeautifulSoup

import time

import os

from slugify import slugify

import re

import chardet

def deleteTarih(text):

text = re.sub(r'(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) \d{1,2}, \d{4}', '', text)

text = text.replace("More items...", "")

text = text.strip()

return text

text = "Arı kuşu"

txtFolder = "başlık_txtler"

print("----- Bot Başarıyla Başlatıldı -----")

options = webdriver.ChromeOptions()

options.add_argument("headless")

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

driver.maximize_window()

driver.implicitly_wait(30)

wait = WebDriverWait(driver, 30)

driver.get("https://www.google.com/")

if not os.path.exists(txtFolder):

os.makedirs(txtFolder)

# Dosya kodlamasını belirlemek için

with open("yeni2.txt", "rb") as f:

raw_data = f.read()

result = chardet.detect(raw_data)

encoding = result['encoding']

print(f"Dosya kodlaması: {encoding}")

with open("yeni2.txt", "r", encoding=encoding) as f:

lines = f.readlines()

for line in lines:

m = driver.find_element("name", "q")

m.send_keys(line.strip())

m.send_keys(Keys.ENTER)

time.sleep(22)

for i in range(4):

try:

test = driver.find_element(By.XPATH, "//div[@jsname='yEVEwb'][" + str(i + 1) + "]")

test.click()

time.sleep(2)

except WebDriverException:

print("Hata: Sayfa yüklenirken beklenmeyen bir hata oluştu. Devam ediliyor...")

continue

soup = BeautifulSoup(driver.page_source, "html.parser")

for idx, link in enumerate(soup.find_all("div", {"jsname": "yEVEwb"})):

header = link.find_all_next("div", {"jsaction": "AWEk5c"})[0].find_all_next("div")[0].find_all_next("span")[0].text

content = link.find_all_next("div", {"jsname": "oQYOj"})[0].find_all_next("div")

if len(content) > 0 and len(content[0].text.split()) >= 8:

print(header)

contentW = content[0].text

contentW = deleteTarih(contentW)

if contentW == "":

contentW = content[0].find_next_siblings("div")[0].text

contentW = deleteTarih(contentW)

file_name = slugify(header).replace('-', ' ')

with open(os.path.join(txtFolder, file_name + ".txt"), 'w', encoding='utf-8') as f:

f.write(header)

f.write('\n')

f.write(contentW)

driver.back()

time.sleep(22)

with open('yeni2.txt', 'r+', encoding=encoding) as f:

lines_temp = f.readlines()

f.seek(0)

for l in lines_temp:

if l.strip() != line.strip():

f.write(l)

f.truncate()

print("***** Bot Başarıyla Tamamlandı Ve Başlık TXT Dosyaları Oluşturuldu. *****")