Deniz
New member
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import time
import os
from slugify import slugify
import re
import chardet
def deleteTarih(text):
text = re.sub(r'(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) \d{1,2}, \d{4}', '', text)
text = text.replace("More items...", "")
text = text.strip()
return text
text = "Arı kuşu"
txtFolder = "başlık_txtler"
print("----- Bot Başarıyla Başlatıldı -----")
options = webdriver.ChromeOptions()
options.add_argument("headless")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
driver.maximize_window()
driver.implicitly_wait(30)
wait = WebDriverWait(driver, 30)
driver.get("https://www.google.com/")
if not os.path.exists(txtFolder):
os.makedirs(txtFolder)
# Dosya kodlamasını belirlemek için
with open("yeni2.txt", "rb") as f:
raw_data = f.read()
result = chardet.detect(raw_data)
encoding = result['encoding']
print(f"Dosya kodlaması: {encoding}")
with open("yeni2.txt", "r", encoding=encoding) as f:
lines = f.readlines()
for line in lines:
m = driver.find_element("name", "q")
m.send_keys(line.strip())
m.send_keys(Keys.ENTER)
time.sleep(22)
for i in range(4):
try:
test = driver.find_element(By.XPATH, "//div[@jsname='yEVEwb'][" + str(i + 1) + "]")
test.click()
time.sleep(2)
except WebDriverException:
print("Hata: Sayfa yüklenirken beklenmeyen bir hata oluştu. Devam ediliyor...")
continue
soup = BeautifulSoup(driver.page_source, "html.parser")
for idx, link in enumerate(soup.find_all("div", {"jsname": "yEVEwb"})):
header = link.find_all_next("div", {"jsaction": "AWEk5c"})[0].find_all_next("div")[0].find_all_next("span")[0].text
content = link.find_all_next("div", {"jsname": "oQYOj"})[0].find_all_next("div")
if len(content) > 0 and len(content[0].text.split()) >= 8:
print(header)
contentW = content[0].text
contentW = deleteTarih(contentW)
if contentW == "":
contentW = content[0].find_next_siblings("div")[0].text
contentW = deleteTarih(contentW)
file_name = slugify(header).replace('-', ' ')
with open(os.path.join(txtFolder, file_name + ".txt"), 'w', encoding='utf-8') as f:
f.write(header)
f.write('\n')
f.write(contentW)
driver.back()
time.sleep(22)
with open('yeni2.txt', 'r+', encoding=encoding) as f:
lines_temp = f.readlines()
f.seek(0)
for l in lines_temp:
if l.strip() != line.strip():
f.write(l)
f.truncate()
print("***** Bot Başarıyla Tamamlandı Ve Başlık TXT Dosyaları Oluşturuldu. *****")
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import time
import os
from slugify import slugify
import re
import chardet
def deleteTarih(text):
text = re.sub(r'(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) \d{1,2}, \d{4}', '', text)
text = text.replace("More items...", "")
text = text.strip()
return text
text = "Arı kuşu"
txtFolder = "başlık_txtler"
print("----- Bot Başarıyla Başlatıldı -----")
options = webdriver.ChromeOptions()
options.add_argument("headless")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
driver.maximize_window()
driver.implicitly_wait(30)
wait = WebDriverWait(driver, 30)
driver.get("https://www.google.com/")
if not os.path.exists(txtFolder):
os.makedirs(txtFolder)
# Dosya kodlamasını belirlemek için
with open("yeni2.txt", "rb") as f:
raw_data = f.read()
result = chardet.detect(raw_data)
encoding = result['encoding']
print(f"Dosya kodlaması: {encoding}")
with open("yeni2.txt", "r", encoding=encoding) as f:
lines = f.readlines()
for line in lines:
m = driver.find_element("name", "q")
m.send_keys(line.strip())
m.send_keys(Keys.ENTER)
time.sleep(22)
for i in range(4):
try:
test = driver.find_element(By.XPATH, "//div[@jsname='yEVEwb'][" + str(i + 1) + "]")
test.click()
time.sleep(2)
except WebDriverException:
print("Hata: Sayfa yüklenirken beklenmeyen bir hata oluştu. Devam ediliyor...")
continue
soup = BeautifulSoup(driver.page_source, "html.parser")
for idx, link in enumerate(soup.find_all("div", {"jsname": "yEVEwb"})):
header = link.find_all_next("div", {"jsaction": "AWEk5c"})[0].find_all_next("div")[0].find_all_next("span")[0].text
content = link.find_all_next("div", {"jsname": "oQYOj"})[0].find_all_next("div")
if len(content) > 0 and len(content[0].text.split()) >= 8:
print(header)
contentW = content[0].text
contentW = deleteTarih(contentW)
if contentW == "":
contentW = content[0].find_next_siblings("div")[0].text
contentW = deleteTarih(contentW)
file_name = slugify(header).replace('-', ' ')
with open(os.path.join(txtFolder, file_name + ".txt"), 'w', encoding='utf-8') as f:
f.write(header)
f.write('\n')
f.write(contentW)
driver.back()
time.sleep(22)
with open('yeni2.txt', 'r+', encoding=encoding) as f:
lines_temp = f.readlines()
f.seek(0)
for l in lines_temp:
if l.strip() != line.strip():
f.write(l)
f.truncate()
print("***** Bot Başarıyla Tamamlandı Ve Başlık TXT Dosyaları Oluşturuldu. *****")