saliacoel commited on
Commit
1233384
·
verified ·
1 Parent(s): ca580d9

Upload 2 files

Browse files
Files changed (2) hide show
  1. salia_get_bam.py +66 -0
  2. salia_zip_to_chars_bam.py +411 -0
salia_get_bam.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import urllib.error
2
+ import urllib.request
3
+
4
+ BAM_URL = "https://huggingface.co/saliacoel/chars/raw/main/BAM.txt"
5
+
6
+
7
+ class GetBAM:
8
+ CATEGORY = "utils/text"
9
+ FUNCTION = "get_line"
10
+ RETURN_TYPES = ("STRING",)
11
+ RETURN_NAMES = ("line",)
12
+
13
+ @classmethod
14
+ def INPUT_TYPES(cls):
15
+ return {
16
+ "required": {
17
+ "ID": (
18
+ "INT",
19
+ {
20
+ "default": 1,
21
+ "min": 1,
22
+ "max": 99999,
23
+ "step": 1,
24
+ },
25
+ ),
26
+ }
27
+ }
28
+
29
+ @classmethod
30
+ def IS_CHANGED(cls, ID):
31
+ # This node reads a remote text file that may change outside ComfyUI,
32
+ # so force re-execution instead of relying only on widget changes.
33
+ return float("nan")
34
+
35
+ def get_line(self, ID):
36
+ request = urllib.request.Request(
37
+ BAM_URL,
38
+ headers={"User-Agent": "ComfyUI-get_BAM/1.0"},
39
+ )
40
+
41
+ try:
42
+ with urllib.request.urlopen(request, timeout=10) as response:
43
+ text = response.read().decode("utf-8-sig", errors="replace")
44
+ except urllib.error.HTTPError as e:
45
+ return (f"ERROR: HTTP {e.code} while reading BAM.txt",)
46
+ except urllib.error.URLError as e:
47
+ reason = getattr(e, "reason", e)
48
+ return (f"ERROR: Could not reach BAM.txt ({reason})",)
49
+ except Exception as e:
50
+ return (f"ERROR: {e}",)
51
+
52
+ lines = text.splitlines()
53
+
54
+ if ID < 1 or ID > len(lines):
55
+ return (f"ERROR: line {ID} is out of range (1-{len(lines)})",)
56
+
57
+ return (lines[ID - 1],)
58
+
59
+
60
+ NODE_CLASS_MAPPINGS = {
61
+ "get_BAM": GetBAM,
62
+ }
63
+
64
+ NODE_DISPLAY_NAME_MAPPINGS = {
65
+ "get_BAM": "get_BAM",
66
+ }
salia_zip_to_chars_bam.py ADDED
@@ -0,0 +1,411 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import os
3
+ import re
4
+ import zipfile
5
+ from typing import Dict, List, Tuple
6
+ from urllib.parse import quote, urlparse
7
+ from urllib.request import Request, urlopen
8
+
9
+ TARGET_REPO_ID = "saliacoel/chars"
10
+ TARGET_REPO_TYPE = "model"
11
+ TARGET_BRANCH = "main"
12
+ TARGET_BAM_PATH = "BAM.txt"
13
+ CATEGORY = "Salia/HuggingFace"
14
+
15
+ _INVALID_PATH_CHARS = re.compile(r'[<>:"\\|?*\x00-\x1F]')
16
+ _NUMBERED_BAM_LINE_RE = re.compile(r"^(\s*)(\d+)\.\s+(.*)$")
17
+
18
+
19
+ def _require_non_empty_string(value, name: str) -> str:
20
+ if value is None:
21
+ raise ValueError(f"{name} is required.")
22
+ value = str(value).strip()
23
+ if not value:
24
+ raise ValueError(f"{name} must not be empty.")
25
+ return value
26
+
27
+
28
+ def _optional_string(value) -> str:
29
+ if value is None:
30
+ return ""
31
+ return str(value).strip()
32
+
33
+
34
+ def _resolve_hf_token(hf_token: str) -> str:
35
+ token = _optional_string(hf_token)
36
+ if token:
37
+ return token
38
+
39
+ for env_name in ("HF_TOKEN", "HUGGINGFACE_TOKEN", "HUGGING_FACE_HUB_TOKEN"):
40
+ env_value = os.environ.get(env_name, "").strip()
41
+ if env_value:
42
+ return env_value
43
+
44
+ raise ValueError(
45
+ "A Hugging Face write token is required. Provide hf_token in the node, or set HF_TOKEN / HUGGINGFACE_TOKEN / HUGGING_FACE_HUB_TOKEN in the environment."
46
+ )
47
+
48
+
49
+ def _sanitize_path_component(component: str, label: str = "path component") -> str:
50
+ component = _require_non_empty_string(component, label)
51
+ component = component.replace("\\", "/")
52
+ component = component.split("/")[-1]
53
+ component = _INVALID_PATH_CHARS.sub("_", component)
54
+ component = component.strip().strip(".")
55
+ if component in {"", ".", ".."}:
56
+ raise ValueError(f"Invalid {label}: {component!r}")
57
+ return component
58
+
59
+
60
+
61
+ def _sanitize_repo_path(path_in_repo: str) -> str:
62
+ path_in_repo = _require_non_empty_string(path_in_repo, "path_in_repo")
63
+ raw_parts = path_in_repo.replace("\\", "/").split("/")
64
+ cleaned_parts = []
65
+ for part in raw_parts:
66
+ part = part.strip()
67
+ if not part or part == ".":
68
+ continue
69
+ if part == "..":
70
+ raise ValueError("path_in_repo must not contain '..'.")
71
+ cleaned_parts.append(_sanitize_path_component(part))
72
+ if not cleaned_parts:
73
+ raise ValueError("path_in_repo became empty after sanitization.")
74
+ return "/".join(cleaned_parts)
75
+
76
+
77
+
78
+ def _quote_repo_path(path_in_repo: str) -> str:
79
+ return "/".join(quote(part, safe="") for part in path_in_repo.split("/"))
80
+
81
+
82
+
83
+ def _repo_file_url(path_in_repo: str) -> str:
84
+ path_in_repo = _sanitize_repo_path(path_in_repo)
85
+ quoted_path = _quote_repo_path(path_in_repo)
86
+ return f"https://huggingface.co/{TARGET_REPO_ID}/resolve/{TARGET_BRANCH}/{quoted_path}"
87
+
88
+
89
+
90
+ def _import_hf_bits():
91
+ try:
92
+ from huggingface_hub import CommitOperationAdd, HfApi, hf_hub_download
93
+ except Exception as exc:
94
+ raise RuntimeError(
95
+ "Missing dependency 'huggingface_hub'. Install it in the ComfyUI Python environment with: pip install huggingface_hub"
96
+ ) from exc
97
+ return HfApi, hf_hub_download, CommitOperationAdd
98
+
99
+
100
+
101
+ def _download_url_bytes(url: str, hf_token: str) -> bytes:
102
+ url = _require_non_empty_string(url, "zip_url")
103
+ parsed = urlparse(url)
104
+ if parsed.scheme not in {"http", "https"}:
105
+ raise ValueError("zip_url must start with http:// or https://")
106
+
107
+ headers = {"User-Agent": "ComfyUI-Salia-Import-TMP-Zip-To-Chars/1.0"}
108
+ host = (parsed.netloc or "").lower()
109
+ token = _optional_string(hf_token)
110
+ if token and (host.endswith("huggingface.co") or host.endswith("hf.co")):
111
+ headers["Authorization"] = f"Bearer {token}"
112
+
113
+ request = Request(url, headers=headers, method="GET")
114
+ try:
115
+ with urlopen(request, timeout=90) as response:
116
+ return response.read()
117
+ except Exception as exc:
118
+ raise RuntimeError(f"Failed to download zip_url: {url}") from exc
119
+
120
+
121
+
122
+ def _decode_text(raw: bytes) -> str:
123
+ for encoding in ("utf-8-sig", "utf-8", "latin-1"):
124
+ try:
125
+ return raw.decode(encoding)
126
+ except Exception:
127
+ continue
128
+ return raw.decode("utf-8", errors="replace")
129
+
130
+
131
+
132
+ def _normalize_newlines(text: str) -> str:
133
+ return str(text).replace("\r\n", "\n").replace("\r", "\n")
134
+
135
+
136
+
137
+ def _load_zip_character_files(zip_bytes: bytes) -> Dict[str, Dict[str, str]]:
138
+ character_files: Dict[str, Dict[str, str]] = {}
139
+
140
+ try:
141
+ zip_buffer = io.BytesIO(zip_bytes)
142
+ with zipfile.ZipFile(zip_buffer, "r") as zf:
143
+ for info in zf.infolist():
144
+ if info.is_dir():
145
+ continue
146
+
147
+ internal_path = info.filename.replace("\\", "/").strip("/")
148
+ if not internal_path or internal_path.startswith("__MACOSX/"):
149
+ continue
150
+
151
+ parts = [part for part in internal_path.split("/") if part]
152
+ if len(parts) < 2:
153
+ continue
154
+
155
+ character_name = parts[0]
156
+ relative_path = "/".join(parts[1:])
157
+ if not relative_path.lower().endswith(".txt"):
158
+ continue
159
+
160
+ text = _normalize_newlines(_decode_text(zf.read(info)))
161
+ character_bucket = character_files.setdefault(character_name, {})
162
+ character_bucket[relative_path] = text
163
+ except zipfile.BadZipFile as exc:
164
+ raise ValueError("zip_url did not contain a valid .zip archive.") from exc
165
+
166
+ if not character_files:
167
+ raise ValueError("No character folders with .txt files were found in the zip archive.")
168
+
169
+ return character_files
170
+
171
+
172
+
173
+ def _extract_bam_updates_and_repo_files(character_files: Dict[str, Dict[str, str]]):
174
+ bam_updates: Dict[str, str] = {}
175
+ repo_text_files: List[Tuple[str, bytes]] = []
176
+
177
+ for character_name, files in character_files.items():
178
+ clean_character_name = _sanitize_path_component(character_name, "character folder name")
179
+
180
+ for relative_path, text in files.items():
181
+ basename = os.path.basename(relative_path).lower()
182
+ if basename == "bam.txt":
183
+ bam_line = text.strip()
184
+ if bam_line:
185
+ bam_updates[character_name] = bam_line
186
+ continue
187
+
188
+ repo_relative_path = _sanitize_repo_path(f"{clean_character_name}/{relative_path}")
189
+ repo_text_files.append((repo_relative_path, text.encode("utf-8")))
190
+
191
+ return bam_updates, repo_text_files
192
+
193
+
194
+
195
+ def _split_bam_line(line: str):
196
+ match = _NUMBERED_BAM_LINE_RE.match(line.rstrip("\n\r"))
197
+ if not match:
198
+ return None
199
+
200
+ indent, number, rest = match.groups()
201
+ rest = rest.rstrip()
202
+ if "," in rest:
203
+ character_name, suffix_rest = rest.split(",", 1)
204
+ suffix = "," + suffix_rest
205
+ else:
206
+ character_name = rest
207
+ suffix = ""
208
+
209
+ return {
210
+ "indent": indent,
211
+ "number": int(number),
212
+ "character_name": character_name.strip(),
213
+ "suffix": suffix,
214
+ }
215
+
216
+
217
+
218
+ def _append_tag_to_existing_line(line: str, bam_tag: str):
219
+ stripped = line.rstrip()
220
+ if bam_tag in stripped:
221
+ return stripped, False, True
222
+
223
+ if stripped.endswith(","):
224
+ return stripped + " " + bam_tag, True, False
225
+
226
+ return stripped + ", " + bam_tag, True, False
227
+
228
+
229
+
230
+ def _apply_bam_updates(current_bam_text: str, bam_updates: Dict[str, str]):
231
+ lines = _normalize_newlines(current_bam_text).split("\n") if current_bam_text else []
232
+
233
+ if lines and lines[-1] == "":
234
+ lines = lines[:-1]
235
+
236
+ highest_number = 0
237
+ for line in lines:
238
+ parsed = _split_bam_line(line)
239
+ if parsed:
240
+ highest_number = max(highest_number, parsed["number"])
241
+
242
+ updated_existing = 0
243
+ appended_new_lines = 0
244
+ duplicate_skips = 0
245
+
246
+ for character_name, bam_tag in bam_updates.items():
247
+ found = False
248
+ for index, line in enumerate(lines):
249
+ parsed = _split_bam_line(line)
250
+ if not parsed:
251
+ continue
252
+ if parsed["character_name"].casefold() != character_name.strip().casefold():
253
+ continue
254
+
255
+ new_line, changed, duplicate = _append_tag_to_existing_line(line, bam_tag)
256
+ lines[index] = new_line
257
+ if duplicate:
258
+ duplicate_skips += 1
259
+ elif changed:
260
+ updated_existing += 1
261
+ found = True
262
+ break
263
+
264
+ if not found:
265
+ highest_number += 1
266
+ lines.append(f"{highest_number}. {character_name.strip()}, {bam_tag}")
267
+ appended_new_lines += 1
268
+
269
+ if not lines:
270
+ return "", updated_existing, appended_new_lines, duplicate_skips
271
+
272
+ return "\n".join(lines) + "\n", updated_existing, appended_new_lines, duplicate_skips
273
+
274
+
275
+
276
+ def _download_current_bam_text(hf_token: str) -> str:
277
+ _, hf_hub_download, _ = _import_hf_bits()
278
+
279
+ try:
280
+ local_path = hf_hub_download(
281
+ repo_id=TARGET_REPO_ID,
282
+ repo_type=TARGET_REPO_TYPE,
283
+ filename=TARGET_BAM_PATH,
284
+ revision=TARGET_BRANCH,
285
+ token=hf_token,
286
+ )
287
+ with open(local_path, "rb") as handle:
288
+ return _normalize_newlines(_decode_text(handle.read()))
289
+ except Exception as exc:
290
+ message = str(exc).lower()
291
+ if "404" in message or "not found" in message or "entry not found" in message:
292
+ return ""
293
+ raise RuntimeError(f"Failed to download current {TARGET_BAM_PATH} from {TARGET_REPO_ID}.") from exc
294
+
295
+
296
+
297
+ def _commit_files(named_bytes: List[Tuple[str, bytes]], hf_token: str, commit_message: str):
298
+ HfApi, _, CommitOperationAdd = _import_hf_bits()
299
+ api = HfApi(token=hf_token)
300
+
301
+ operations = [
302
+ CommitOperationAdd(path_in_repo=_sanitize_repo_path(path_in_repo), path_or_fileobj=file_bytes)
303
+ for path_in_repo, file_bytes in named_bytes
304
+ ]
305
+
306
+ commit_info = api.create_commit(
307
+ repo_id=TARGET_REPO_ID,
308
+ repo_type=TARGET_REPO_TYPE,
309
+ revision=TARGET_BRANCH,
310
+ operations=operations,
311
+ commit_message=commit_message,
312
+ token=hf_token,
313
+ )
314
+
315
+ commit_url = getattr(commit_info, "commit_url", None)
316
+ if not commit_url:
317
+ commit_url = str(commit_info)
318
+ return commit_url
319
+
320
+
321
+ class Salia_Import_TMP_zip_to_chars:
322
+ CATEGORY = CATEGORY
323
+ OUTPUT_NODE = True
324
+ RETURN_TYPES = ("STRING", "INT", "INT", "STRING", "STRING")
325
+ RETURN_NAMES = ("summary", "character_count", "uploaded_text_file_count", "bam_url", "commit_url")
326
+ FUNCTION = "apply_zip"
327
+ DESCRIPTION = (
328
+ "Download a zip of character folders from a URL, merge each folder's bam.txt into saliacoel/chars:BAM.txt, "
329
+ "and upload the other .txt files into matching character folders in saliacoel/chars."
330
+ )
331
+ SEARCH_ALIASES = [
332
+ "reverse tmp zip import",
333
+ "download zip to chars repo",
334
+ "merge bam txt from zip",
335
+ ]
336
+
337
+ @classmethod
338
+ def INPUT_TYPES(cls):
339
+ return {
340
+ "required": {
341
+ "zip_url": (
342
+ "STRING",
343
+ {
344
+ "default": "https://huggingface.co/saliacoel/tmp/resolve/main/zipped_textfiles.zip",
345
+ "multiline": False,
346
+ },
347
+ ),
348
+ "hf_token": (
349
+ "STRING",
350
+ {
351
+ "default": "",
352
+ "multiline": False,
353
+ "placeholder": "hf_... (leave blank to use HF_TOKEN env var)",
354
+ },
355
+ ),
356
+ }
357
+ }
358
+
359
+ def apply_zip(self, zip_url, hf_token):
360
+ token = _resolve_hf_token(hf_token)
361
+ zip_bytes = _download_url_bytes(zip_url, token)
362
+ character_files = _load_zip_character_files(zip_bytes)
363
+ bam_updates, repo_text_files = _extract_bam_updates_and_repo_files(character_files)
364
+
365
+ if not bam_updates and not repo_text_files:
366
+ raise ValueError("No usable .txt files were found in the zip archive.")
367
+
368
+ current_bam_text = _download_current_bam_text(token)
369
+ new_bam_text, updated_existing, appended_new_lines, duplicate_skips = _apply_bam_updates(
370
+ current_bam_text,
371
+ bam_updates,
372
+ )
373
+
374
+ commit_files = list(repo_text_files)
375
+ commit_files.append((TARGET_BAM_PATH, new_bam_text.encode("utf-8")))
376
+
377
+ character_count = len(character_files)
378
+ uploaded_text_file_count = len(repo_text_files)
379
+ commit_message = (
380
+ f"ComfyUI import {character_count} character folders from zip "
381
+ f"({uploaded_text_file_count} text files + {TARGET_BAM_PATH})"
382
+ )
383
+ commit_url = _commit_files(commit_files, token, commit_message)
384
+
385
+ summary = (
386
+ f"Processed {character_count} character folders. "
387
+ f"Uploaded {uploaded_text_file_count} non-bam text files. "
388
+ f"BAM updates: {len(bam_updates)} tags processed, "
389
+ f"{updated_existing} existing lines extended, "
390
+ f"{appended_new_lines} new lines appended, "
391
+ f"{duplicate_skips} duplicate tags skipped."
392
+ )
393
+
394
+ return (
395
+ summary,
396
+ int(character_count),
397
+ int(uploaded_text_file_count),
398
+ _repo_file_url(TARGET_BAM_PATH),
399
+ commit_url,
400
+ )
401
+
402
+
403
+ NODE_CLASS_MAPPINGS = {
404
+ "Salia_Import_TMP_zip_to_chars": Salia_Import_TMP_zip_to_chars,
405
+ }
406
+
407
+ NODE_DISPLAY_NAME_MAPPINGS = {
408
+ "Salia_Import_TMP_zip_to_chars": "Salia_Import_TMP_zip_to_chars",
409
+ }
410
+
411
+ __all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"]