Spaces:

WordLift
/

synthID

Running

App Files Files Community

synthID / app.py

cyberandy

Update app.py

120c013 verified over 1 year ago

raw

history blame contribute delete

9.08 kB

	import gradio as gr
	import requests
	import json

	class SynthIDApp:
	def __init__(self):
	self.api_url = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
	self.headers = None
	self.WATERMARK_KEYS = [654, 400, 836, 123, 340, 443, 597, 160, 57, 789]

	def login(self, hf_token):
	"""Initialize the API headers with authentication."""
	try:
	self.headers = {"Authorization": f"Bearer {hf_token}"}

	# Test the connection with a simple query
	response = requests.post(
	self.api_url,
	headers=self.headers,
	json={"inputs": "Test", "parameters": {"max_new_tokens": 1}}
	)
	response.raise_for_status()

	return "API connection initialized successfully!"
	except Exception as e:
	self.headers = None
	return f"Error initializing API: {str(e)}"

	def apply_watermark(self, text, ngram_len):
	"""Apply SynthID watermark to input text using the inference API."""
	if not self.headers:
	return text, "Error: API not initialized. Please login first."

	try:
	# Prepare the API request parameters
	# Prepare the API request parameters for watermarking
	prompt = f"<s>[INST] Return the exact same text, with watermark applied: {text} [/INST]"

	params = {
	"inputs": prompt,
	"parameters": {
	"return_full_text": True,
	"do_sample": False, # Deterministic generation
	"temperature": 0.01, # Almost deterministic
	"watermarking_config": {
	"keys": self.WATERMARK_KEYS,
	"ngram_len": int(ngram_len)
	}
	}
	}

	# Make the API call
	response = requests.post(
	self.api_url,
	headers=self.headers,
	json=params
	)
	response.raise_for_status()

	# Make the API call
	response = requests.post(
	self.api_url,
	headers=self.headers,
	json=params,
	timeout=30 # Add timeout
	)
	response.raise_for_status()

	# Extract the watermarked text
	result = response.json()
	if isinstance(result, list) and len(result) > 0:
	if 'error' in result[0]:
	return text, f"API Error: {result[0]['error']}"

	generated_text = result[0].get('generated_text', '').strip()

	# Extract only the response part after the instruction
	try:
	# First try splitting on [/INST]
	parts = generated_text.split("[/INST]")
	if len(parts) > 1:
	watermarked_text = parts[-1].strip()
	else:
	# If no [/INST], try finding the original text and take what follows
	idx = generated_text.find(text)
	if idx != -1:
	watermarked_text = generated_text[idx + len(text):].strip()
	else:
	# If all else fails, take the whole text
	watermarked_text = generated_text
	except Exception as e:
	return text, f"Error processing response: {str(e)}"

	# Clean up the text
	watermarked_text = watermarked_text.strip(' .')

	if not watermarked_text:
	return text, "Error: No watermarked text generated"

	# Add back the period if the original had one
	if text.strip().endswith('.'):
	watermarked_text += '.'

	return watermarked_text, f"Watermark applied successfully! (ngram_len: {ngram_len})"
	else:
	return text, f"Error: Unexpected API response format: {str(result)}"

	return watermarked_text, f"Watermark applied successfully! (ngram_len: {ngram_len})"
	except Exception as e:
	return text, f"Error applying watermark: {str(e)}"

	def analyze_text(self, text):
	"""Analyze text characteristics."""
	try:
	total_words = len(text.split())
	avg_word_length = sum(len(word) for word in text.split()) / total_words if total_words > 0 else 0
	char_count = len(text)

	analysis = f"""Text Analysis:
	- Total characters: {char_count}
	- Total words: {total_words}
	- Average word length: {avg_word_length:.2f}

	Note: This is a basic analysis. The official SynthID detector is not yet available in the public transformers package."""

	return analysis
	except Exception as e:
	return f"Error analyzing text: {str(e)}"

	# Create Gradio interface
	app_instance = SynthIDApp()

	with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
	gr.Markdown("# SynthID Text Watermarking Tool")
	gr.Markdown("Using Mistral-7B-Instruct-v0.2 with Hugging Face Inference API")

	# Login section
	with gr.Row():
	hf_token = gr.Textbox(
	label="Enter Hugging Face Token",
	type="password",
	placeholder="hf_..."
	)
	login_status = gr.Textbox(label="Login Status")
	login_btn = gr.Button("Login")
	login_btn.click(app_instance.login, inputs=[hf_token], outputs=[login_status])

	with gr.Tab("Apply Watermark"):
	with gr.Row():
	with gr.Column(scale=3):
	input_text = gr.Textbox(
	label="Input Text",
	lines=5,
	placeholder="Enter text to watermark...",
	value="Test Sentence: WordLift is a cutting-edge platform designed to enhance your digital content by leveraging the power of semantic technology. It transforms your website into a structured repository of knowledge, making your content more discoverable, engaging, and aligned with modern search engine algorithms. By utilizing AI-driven entity extraction and knowledge graph generation, WordLift helps you bridge the gap between your content and search intent, ensuring optimal visibility and performance."
	)
	output_text = gr.Textbox(label="Watermarked Text", lines=5)
	with gr.Column(scale=1):
	ngram_len = gr.Slider(
	label="N-gram Length",
	minimum=2,
	maximum=5,
	step=1,
	value=2,
	info="Controls watermark detectability (2-5)"
	)
	status = gr.Textbox(label="Status")

	gr.Markdown("""
	### N-gram Length Parameter:
	- Higher values (4-5): More detectable watermark, but more brittle to changes
	- Lower values (2-3): More robust to changes, but harder to detect
	- Default (5): Maximum detectability""")

	apply_btn = gr.Button("Apply Watermark")
	apply_btn.click(
	app_instance.apply_watermark,
	inputs=[input_text, ngram_len],
	outputs=[output_text, status]
	)

	with gr.Tab("Analyze Text"):
	with gr.Row():
	analyze_input = gr.Textbox(
	label="Text to Analyze",
	lines=5,
	placeholder="Enter text to analyze..."
	)
	analyze_result = gr.Textbox(label="Analysis Result", lines=5)
	analyze_btn = gr.Button("Analyze Text")
	analyze_btn.click(app_instance.analyze_text, inputs=[analyze_input], outputs=[analyze_result])

	gr.Markdown("""
	### Instructions:
	1. Enter your Hugging Face token and click Login
	2. Once connected, you can use the tabs to apply watermarks or analyze text
	3. Adjust the N-gram Length slider to control watermark characteristics

	### Notes:
	- The watermarking process attempts to maintain the original meaning while adding the watermark
	- If you get unexpected results, try adjusting the n-gram length or slightly rephrasing your text
	- This is an experimental feature using the Inference API
	- No model download required - everything runs in the cloud
	- The watermark is designed to be imperceptible to humans
	- This demo only implements watermark application
	- The official detector will be available in future releases
	- For production use, use your own secure watermark keys
	- Your token is never stored and is only used for API access
	""")

	# Launch the app
	if __name__ == "__main__":
	app.launch()