Spaces:

oyi77
/

1proxy

Paused

App Files Files Community

Paijo commited on Feb 2

Commit

24a5e4d

verified ·

1 Parent(s): 803e6de

update tests/unit/test_extractor.py

Browse files

Files changed (1) hide show

tests/unit/test_extractor.py +84 -0

tests/unit/test_extractor.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import base64
+import pytest
+from app.hunter.extractor import UniversalExtractor
+class TestUniversalExtractor:
+    def test_extract_simple_ip_port(self):
+        content = "Here is a proxy 1.1.1.1:80 and another 192.168.1.1:8080 end"
+        proxies = UniversalExtractor.extract_proxies(content)
+        assert len(proxies) == 2
+        assert proxies[0].ip == "1.1.1.1"
+        assert proxies[0].port == 80
+        assert proxies[1].ip == "192.168.1.1"
+        assert proxies[1].port == 8080
+    def test_extract_base64_content(self):
+        # "1.1.1.1:80" encoded
+        content = "MS4xLjEuMTo4MA=="
+        proxies = UniversalExtractor.extract_proxies(content)
+        assert len(proxies) == 1
+        assert proxies[0].ip == "1.1.1.1"
+        assert proxies[0].port == 80
+    def test_extract_messy_html(self):
+        content = """
+        <html>
+        <body>
+            <p>List of proxies:</p>
+            <div>10.0.0.1:3128</div>
+            <span>8.8.8.8:80</span>
+        </body>
+        </html>
+        """
+        proxies = UniversalExtractor.extract_proxies(content)
+        assert len(proxies) == 2
+        ips = {p.ip for p in proxies}
+        assert "10.0.0.1" in ips
+        assert "8.8.8.8" in ips
+    def test_extract_vmess_and_vless(self):
+        # Fake vmess/vless links (using patterns that match ProxyPatterns regex)
+        # VMess regex: vmess://[A-Za-z0-9+/=]+
+        # VLESS regex: vless://[a-zA-Z0-9-]+@[a-zA-Z0-9.-]+:[0-9]+[^\s]*
+        vmess = "vmess://ew0KICAidiI6ICIyIiwNCiAgInBzIjogInRlc3QiLA0KICAiYWRkIjogIjEuMi4zLjQiLA0KICAicG9ydCI6ICI0NDMiLA0KICAiaWQiOiAiYWJjZCIsDQogICJhaWQiOiAiMCIsDQogICJuZXQiOiAidGNwIiwNCiAgInR5cGUiOiAibm9uZSIsDQogICJob3N0IjogIiIsDQogICJwYXRoIjogIiIsDQogICJ0bHMiOiAiIg0KfQ=="
+        vless = "vless://uuid-test@example.com:443?type=tcp"
+        content = f"{vmess}\n{vless}"
+        # We need to handle the fact that parsers might fail if the base64 content in vmess is invalid JSON
+        # But here I used a valid vmess json base64
+        proxies = UniversalExtractor.extract_proxies(content)
+        # Should find at least the vless one, and vmess if parser works
+        assert len(proxies) >= 1
+        protocols = {p.protocol for p in proxies}
+        assert "vless" in protocols
+        # VMess parser logic is complex, might fail if my mock string isn't perfect, but let's see.
+    def test_deduplication(self):
+        content = "1.1.1.1:80\n1.1.1.1:80"
+        proxies = UniversalExtractor.extract_proxies(content)
+        assert len(proxies) == 1
+    def test_mixed_base64_and_text(self):
+        # Sometimes a file has some text header + base64 blob
+        # The extractor tries to decode the whole thing. If it fails, it treats as text.
+        # But if the file is PURE base64, it decodes.
+        # If it's mixed, SubscriptionDecoder might fail or return partial?
+        # Our implementation: _try_decode catches exception and returns original text.
+        # Then _parse_text runs on original text.
+        # So if I have "Header\n" + base64, decoding fails, so it parses as text.
+        # Regex will find nothing in the base64 part if it's encoded.
+        # This is a limitation of simple UniversalExtractor unless we try to find base64 blobs *inside* text.
+        # For Phase 1, we assume full content is either text OR base64.
+        # Let's test just text
+        content = "Proxy: 1.1.1.1:80"
+        proxies = UniversalExtractor.extract_proxies(content)
+        assert len(proxies) == 1