Move test for get_all_text to test_parser_advanced.py
Browse files
tests/parser/test_general.py
CHANGED
|
@@ -327,32 +327,6 @@ def test_getting_all_text(page):
|
|
| 327 |
assert page.get_all_text() != ""
|
| 328 |
|
| 329 |
|
| 330 |
-
def test_getting_all_text_from_nested_content():
|
| 331 |
-
"""Test getting all text preserves interleaved text nodes"""
|
| 332 |
-
html = """
|
| 333 |
-
<html>
|
| 334 |
-
<body>
|
| 335 |
-
<main>
|
| 336 |
-
string1
|
| 337 |
-
<b>string2</b>
|
| 338 |
-
string3
|
| 339 |
-
<div>
|
| 340 |
-
<span>string4</span>
|
| 341 |
-
</div>
|
| 342 |
-
string5
|
| 343 |
-
<script>ignored</script>
|
| 344 |
-
<style>ignored</style>
|
| 345 |
-
</main>
|
| 346 |
-
</body>
|
| 347 |
-
</html>
|
| 348 |
-
"""
|
| 349 |
-
|
| 350 |
-
page = Selector(html, adaptive=False)
|
| 351 |
-
node = page.css("main")[0]
|
| 352 |
-
|
| 353 |
-
assert node.get_all_text("\n", strip=True) == "string1\nstring2\nstring3\nstring4\nstring5"
|
| 354 |
-
|
| 355 |
-
|
| 356 |
def test_regex_on_text(page):
|
| 357 |
"""Test regex operations on text"""
|
| 358 |
element = page.css('[data-id="1"] .price')[0]
|
|
|
|
| 327 |
assert page.get_all_text() != ""
|
| 328 |
|
| 329 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
def test_regex_on_text(page):
|
| 331 |
"""Test regex operations on text"""
|
| 332 |
element = page.css('[data-id="1"] .price')[0]
|
tests/parser/test_parser_advanced.py
CHANGED
|
@@ -183,6 +183,33 @@ class TestAdvancedSelectors:
|
|
| 183 |
text = page.get_all_text(valid_values=False)
|
| 184 |
assert text != ""
|
| 185 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
|
| 187 |
class TestTextHandlerAdvanced:
|
| 188 |
"""Test advanced TextHandler functionality"""
|
|
|
|
| 183 |
text = page.get_all_text(valid_values=False)
|
| 184 |
assert text != ""
|
| 185 |
|
| 186 |
+
def test_get_all_text_preserves_interleaved_text_nodes(self):
|
| 187 |
+
"""Test get_all_text preserves interleaved text nodes"""
|
| 188 |
+
html = """
|
| 189 |
+
<html>
|
| 190 |
+
<body>
|
| 191 |
+
<main>
|
| 192 |
+
string1
|
| 193 |
+
<b>string2</b>
|
| 194 |
+
string3
|
| 195 |
+
<div>
|
| 196 |
+
<span>string4</span>
|
| 197 |
+
</div>
|
| 198 |
+
string5
|
| 199 |
+
<script>ignored</script>
|
| 200 |
+
string6
|
| 201 |
+
<style>ignored</style>
|
| 202 |
+
string7
|
| 203 |
+
</main>
|
| 204 |
+
</body>
|
| 205 |
+
</html>
|
| 206 |
+
"""
|
| 207 |
+
|
| 208 |
+
page = Selector(html, adaptive=False)
|
| 209 |
+
node = page.css("main")[0]
|
| 210 |
+
|
| 211 |
+
assert node.get_all_text("\n", strip=True) == "string1\nstring2\nstring3\nstring4\nstring5\nstring6\nstring7"
|
| 212 |
+
|
| 213 |
|
| 214 |
class TestTextHandlerAdvanced:
|
| 215 |
"""Test advanced TextHandler functionality"""
|