File size: 4,752 Bytes
13e4835
 
 
 
 
 
 
 
 
 
 
 
986cd95
b438494
13e4835
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b438494
 
13e4835
b438494
 
13e4835
 
b438494
 
13e4835
b438494
 
13e4835
b438494
986cd95
 
 
 
 
13e4835
 
 
 
986cd95
13e4835
 
986cd95
b438494
986cd95
13e4835
 
 
 
 
 
 
 
 
 
986cd95
13e4835
b438494
 
 
13e4835
38334af
b438494
13e4835
986cd95
 
13e4835
38334af
986cd95
13e4835
 
b438494
 
2f70cff
 
13e4835
986cd95
 
 
 
 
 
 
 
 
 
 
 
2f70cff
13e4835
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
from io import StringIO
from urllib.parse import urlparse
import streamlit as st
import requests
from dotenv import load_dotenv
import os
import time
from helper.telemetry import collect_telemetry
from helper.upload_File import uploadFile
from helper.button_behaviour import hide_button, unhide_button
from helper.initialize_analyze_session import initialize_analyze_session
import pandas as pd
import asyncio
import json

class SeoOffPageAnalyst:
    def __init__(self, model_url):
        self.uploaded_files = []
        self.file_dict = {}
        self.model_url = model_url
        #self.analyst_name = analyst_name
        #self.data_src = data_src
        #self.analyst_description = analyst_description
        self.initialize()
        self.row1()

    def initialize(self):
        # FOR ENV
        load_dotenv()

        # AGENT NAME
        #st.header(self.analyst_name)
        if 'off_page_file_uploaded' not in st.session_state:
            st.session_state['off_page_file_uploaded'] = ''
    
    def request_model(self, payload_txt, headers):
        response = requests.post(self.model_url, json=payload_txt, headers=headers)
        response.raise_for_status()
        output = response.json()
        text = output["outputs"][0]["outputs"][0]["results"]["text"]["data"]["text"]
        text = json.loads(text)
        
        backlinks = text[0]
        referring_domains = text[1]

        return text
    
    def process(self):
         start_time = time.time()
         session = st.session_state['analyze']
         if self.uploaded_files and session == 'clicked':
                    combined_text = ""
                    with st.spinner('SEO Off Page Analyst...', show_time=True):
                        st.write('')
                        for file_info in st.session_state['uploaded_files'].values():
                            '''
                            if file_info['type'] == 'pdf':
                                combined_text += file_info['content'] + "\n"
                            '''
                        try:
                            if file_info['type'] == 'csv':                                    
                                # Load CSV
                                df = pd.read_csv(StringIO(file_info['content'].to_csv(index=True)))
                                
                                # Count total rows
                                num_rows = len(df)
                                
                                # Extract unique domains from 'Source url'
                                df['Source Domain'] = df['Source url'].apply(lambda x: urlparse(x).netloc)
                                unique_domains = df['Source Domain'].nunique()
                                    
                                combined_text += f"Total Backlinks Count: {num_rows}\n"
                                combined_text += f"Referring Domain: {unique_domains}"
                                st.info("Backlinks - SEMRush Uploaded Successfuly", icon="ℹ️")
                        except KeyError:
                            st.info("Incorrect CSV format. Please upload a valid CSV file.")
                        # OUTPUT FOR SEO ANALYST

                        #result = self.request_model(payload_txt, headers)
                        
                        #end_time = time.time()
                        #time_lapsed = end_time - start_time
                        
                        debug_info = {'data_field' : 'Backlinks', 'result': combined_text}
                        #debug_info = {'url_uuid': self.model_url.split("-")[-1],'time_lapsed' : time_lapsed, 'files': [*st.session_state['uploaded_files']],'payload': payload_txt, 'result': result}
                        collect_telemetry(debug_info)
                        
                        st.session_state["off_page_file_uploaded"] = 'uploaded'
                        
                        #with st.expander("Debug information", icon="⚙"):
                        #    st.write(debug_info)
                        st.session_state['analyzing'] = False
    def row1(self):
            #st.write(self.data_src)
            self.uploaded_files = st.file_uploader('Backlinks - SEMRush', type='csv', accept_multiple_files=True, key="seo_off")
            if self.uploaded_files:
                upload.multiple_upload_file(self.uploaded_files)
                
            #st.write("") # FOR THE HIDE BUTTON
            #st.write("") # FOR THE HIDE BUTTON
           
            st.session_state['analyzing'] = False
            
            self.process()
                                       

if __name__ == "__main__":
    st.set_page_config(layout="wide")

upload = uploadFile()