| import gradio as gr |
| import requests |
| import json |
|
|
| class SynthIDApp: |
| def __init__(self): |
| self.api_url = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2" |
| self.headers = None |
| self.WATERMARK_KEYS = [654, 400, 836, 123, 340, 443, 597, 160, 57, 789] |
| |
| def login(self, hf_token): |
| """Initialize the API headers with authentication.""" |
| try: |
| self.headers = {"Authorization": f"Bearer {hf_token}"} |
| |
| |
| response = requests.post( |
| self.api_url, |
| headers=self.headers, |
| json={"inputs": "Test", "parameters": {"max_new_tokens": 1}} |
| ) |
| response.raise_for_status() |
| |
| return "API connection initialized successfully!" |
| except Exception as e: |
| self.headers = None |
| return f"Error initializing API: {str(e)}" |
|
|
| def apply_watermark(self, text, ngram_len): |
| """Apply SynthID watermark to input text using the inference API.""" |
| if not self.headers: |
| return text, "Error: API not initialized. Please login first." |
| |
| try: |
| |
| |
| prompt = f"<s>[INST] Return the exact same text, with watermark applied: {text} [/INST]" |
| |
| params = { |
| "inputs": prompt, |
| "parameters": { |
| "return_full_text": True, |
| "do_sample": False, |
| "temperature": 0.01, |
| "watermarking_config": { |
| "keys": self.WATERMARK_KEYS, |
| "ngram_len": int(ngram_len) |
| } |
| } |
| } |
| |
| |
| response = requests.post( |
| self.api_url, |
| headers=self.headers, |
| json=params |
| ) |
| response.raise_for_status() |
| |
| |
| response = requests.post( |
| self.api_url, |
| headers=self.headers, |
| json=params, |
| timeout=30 |
| ) |
| response.raise_for_status() |
| |
| |
| result = response.json() |
| if isinstance(result, list) and len(result) > 0: |
| if 'error' in result[0]: |
| return text, f"API Error: {result[0]['error']}" |
| |
| generated_text = result[0].get('generated_text', '').strip() |
| |
| |
| try: |
| |
| parts = generated_text.split("[/INST]") |
| if len(parts) > 1: |
| watermarked_text = parts[-1].strip() |
| else: |
| |
| idx = generated_text.find(text) |
| if idx != -1: |
| watermarked_text = generated_text[idx + len(text):].strip() |
| else: |
| |
| watermarked_text = generated_text |
| except Exception as e: |
| return text, f"Error processing response: {str(e)}" |
| |
| |
| watermarked_text = watermarked_text.strip(' .') |
| |
| if not watermarked_text: |
| return text, "Error: No watermarked text generated" |
| |
| |
| if text.strip().endswith('.'): |
| watermarked_text += '.' |
| |
| return watermarked_text, f"Watermark applied successfully! (ngram_len: {ngram_len})" |
| else: |
| return text, f"Error: Unexpected API response format: {str(result)}" |
| |
| return watermarked_text, f"Watermark applied successfully! (ngram_len: {ngram_len})" |
| except Exception as e: |
| return text, f"Error applying watermark: {str(e)}" |
|
|
| def analyze_text(self, text): |
| """Analyze text characteristics.""" |
| try: |
| total_words = len(text.split()) |
| avg_word_length = sum(len(word) for word in text.split()) / total_words if total_words > 0 else 0 |
| char_count = len(text) |
| |
| analysis = f"""Text Analysis: |
| - Total characters: {char_count} |
| - Total words: {total_words} |
| - Average word length: {avg_word_length:.2f} |
| |
| Note: This is a basic analysis. The official SynthID detector is not yet available in the public transformers package.""" |
| |
| return analysis |
| except Exception as e: |
| return f"Error analyzing text: {str(e)}" |
|
|
| |
| app_instance = SynthIDApp() |
|
|
| with gr.Blocks(title="SynthID Text Watermarking Tool") as app: |
| gr.Markdown("# SynthID Text Watermarking Tool") |
| gr.Markdown("Using Mistral-7B-Instruct-v0.2 with Hugging Face Inference API") |
| |
| |
| with gr.Row(): |
| hf_token = gr.Textbox( |
| label="Enter Hugging Face Token", |
| type="password", |
| placeholder="hf_..." |
| ) |
| login_status = gr.Textbox(label="Login Status") |
| login_btn = gr.Button("Login") |
| login_btn.click(app_instance.login, inputs=[hf_token], outputs=[login_status]) |
| |
| with gr.Tab("Apply Watermark"): |
| with gr.Row(): |
| with gr.Column(scale=3): |
| input_text = gr.Textbox( |
| label="Input Text", |
| lines=5, |
| placeholder="Enter text to watermark...", |
| value="Test Sentence: WordLift is a cutting-edge platform designed to enhance your digital content by leveraging the power of semantic technology. It transforms your website into a structured repository of knowledge, making your content more discoverable, engaging, and aligned with modern search engine algorithms. By utilizing AI-driven entity extraction and knowledge graph generation, WordLift helps you bridge the gap between your content and search intent, ensuring optimal visibility and performance." |
| ) |
| output_text = gr.Textbox(label="Watermarked Text", lines=5) |
| with gr.Column(scale=1): |
| ngram_len = gr.Slider( |
| label="N-gram Length", |
| minimum=2, |
| maximum=5, |
| step=1, |
| value=2, |
| info="Controls watermark detectability (2-5)" |
| ) |
| status = gr.Textbox(label="Status") |
| |
| gr.Markdown(""" |
| ### N-gram Length Parameter: |
| - Higher values (4-5): More detectable watermark, but more brittle to changes |
| - Lower values (2-3): More robust to changes, but harder to detect |
| - Default (5): Maximum detectability""") |
| |
| apply_btn = gr.Button("Apply Watermark") |
| apply_btn.click( |
| app_instance.apply_watermark, |
| inputs=[input_text, ngram_len], |
| outputs=[output_text, status] |
| ) |
| |
| with gr.Tab("Analyze Text"): |
| with gr.Row(): |
| analyze_input = gr.Textbox( |
| label="Text to Analyze", |
| lines=5, |
| placeholder="Enter text to analyze..." |
| ) |
| analyze_result = gr.Textbox(label="Analysis Result", lines=5) |
| analyze_btn = gr.Button("Analyze Text") |
| analyze_btn.click(app_instance.analyze_text, inputs=[analyze_input], outputs=[analyze_result]) |
| |
| gr.Markdown(""" |
| ### Instructions: |
| 1. Enter your Hugging Face token and click Login |
| 2. Once connected, you can use the tabs to apply watermarks or analyze text |
| 3. Adjust the N-gram Length slider to control watermark characteristics |
| |
| ### Notes: |
| - The watermarking process attempts to maintain the original meaning while adding the watermark |
| - If you get unexpected results, try adjusting the n-gram length or slightly rephrasing your text |
| - This is an experimental feature using the Inference API |
| - No model download required - everything runs in the cloud |
| - The watermark is designed to be imperceptible to humans |
| - This demo only implements watermark application |
| - The official detector will be available in future releases |
| - For production use, use your own secure watermark keys |
| - Your token is never stored and is only used for API access |
| """) |
|
|
| |
| if __name__ == "__main__": |
| app.launch() |