File size: 1,459 Bytes
d38101e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50


import pandas as pd
from googlesearch import search
import time
import random

df = pd.read_csv("search_progress.csv")
df1 = df.drop("query_index", axis=1)

print("Initial DataFrame:")
print(df1.head())

df1.columns = ["title", "url"]

unfinished = df1[(df1.isnull().any(axis=1)) | ~((df1["url"].str.contains("amazon", na=False)) | (df1["url"].str.contains("google", na=False)))]

unfinished_list = unfinished["title"].tolist()
unfinished_urls = [None] * len(unfinished_list)



for idx,i in enumerate(unfinished_list):
    print()
    print(f"Processing title {idx + 1}/{len(unfinished_list)}: {i}")
    try:
        results1 = search(i, num_results=3, lang="en")
        results2 = search(i.replace("google", "amazon"), num_results=3, lang="en")
        url = list(results1) + list(results2)
        count = 0
        print("\n")
        print(f"Searching for: {i}")
        for j in url:
            count += 1
            print(count, j)
        index = int(input("Enter the index of the correct URL (1-3): ")) - 1
        unfinished_urls[idx] = url[index]
    except Exception as e:
        print(f"Error occurred while searching for {i}: {e}")
        unfinished_urls[idx] = None 
    time.sleep(random.randint(1,5))  # Sleep to avoid hitting the search API too quickly

unfinished["url"] = unfinished_urls
print("Updated DataFrame with URLs:")
print(unfinished.head())

df1.update(unfinished)
df1.to_csv("search_progress1.csv", index=False)