bobbypaton commited on
Commit ·
233f6d4
0
Parent(s):
Initial CASCADE HF Space deployment
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +3 -0
- .gitignore +8 -0
- DEPLOYMENT_GUIDE.md +207 -0
- Dockerfile +26 -0
- NMR_Prediction/__init__.py +0 -0
- NMR_Prediction/apply.py +255 -0
- NMR_Prediction/genConf.py +243 -0
- NMR_Prediction/preprocessor.p +3 -0
- NMR_Prediction/schnet_edgeupdate/best_model.hdf5 +3 -0
- NMR_Prediction/valid.py +9 -0
- README.md +32 -0
- app.py +178 -0
- nfp/__init__.py +15 -0
- nfp/layers/__init__.py +3 -0
- nfp/layers/layers.py +445 -0
- nfp/layers/utils.py +90 -0
- nfp/layers/wrappers.py +45 -0
- nfp/models/__init__.py +2 -0
- nfp/models/losses.py +19 -0
- nfp/models/models.py +12 -0
- nfp/preprocessing/__init__.py +4 -0
- nfp/preprocessing/features.py +128 -0
- nfp/preprocessing/preprocessor.py +734 -0
- nfp/preprocessing/scaling.py +40 -0
- nfp/preprocessing/sequence.py +115 -0
- nfp/preprocessing/test.py +734 -0
- requirements.txt +24 -0
- setup_assets.sh +50 -0
- start.sh +15 -0
- static/JSmol.min.js +0 -0
- static/css/doc.css +276 -0
- static/images/cascadebanner.png +0 -0
- static/images/custom-icon.png +0 -0
- static/j2s/J/Jmol.properties +3 -0
- static/j2s/J/adapter/readers/aflow/AFLOWReader.js +190 -0
- static/j2s/J/adapter/readers/cif/Cif2DataParser.js +207 -0
- static/j2s/J/adapter/readers/cif/Cif2Reader.js +8 -0
- static/j2s/J/adapter/readers/cif/CifReader.js +1226 -0
- static/j2s/J/adapter/readers/cif/MMCifReader.js +709 -0
- static/j2s/J/adapter/readers/cif/MMCifValidationParser.js +132 -0
- static/j2s/J/adapter/readers/cif/MMTFReader.js +286 -0
- static/j2s/J/adapter/readers/cif/MSCifParser.js +383 -0
- static/j2s/J/adapter/readers/cif/MSRdr.js +593 -0
- static/j2s/J/adapter/readers/cif/Subsystem.js +105 -0
- static/j2s/J/adapter/readers/molxyz/Mol3DReader.js +8 -0
- static/j2s/J/adapter/readers/molxyz/MolReader.js +306 -0
- static/j2s/J/adapter/readers/molxyz/V3000Rdr.js +158 -0
- static/j2s/J/adapter/readers/molxyz/XyzReader.js +80 -0
- static/j2s/J/adapter/readers/more/BinaryDcdReader.js +152 -0
- static/j2s/J/adapter/readers/more/ForceFieldReader.js +93 -0
.gitattributes
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.hdf5 filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.p filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.tar.gz filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
*.py[cod]
|
| 3 |
+
*.egg-info/
|
| 4 |
+
.env
|
| 5 |
+
.DS_Store
|
| 6 |
+
*.sqlite3
|
| 7 |
+
results/
|
| 8 |
+
*.tar.gz
|
DEPLOYMENT_GUIDE.md
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CASCADE → Hugging Face Spaces: Deployment Guide
|
| 2 |
+
|
| 3 |
+
## Overview
|
| 4 |
+
|
| 5 |
+
The HF Space replaces Django with Flask but keeps the exact same Redis job-queue
|
| 6 |
+
pattern and frontend JS. The app is mounted at `/cascade_v1/` so the live URL is:
|
| 7 |
+
|
| 8 |
+
```
|
| 9 |
+
https://patonlab-cascade.hf.space/cascade_v1/predict/
|
| 10 |
+
```
|
| 11 |
+
|
| 12 |
+
The Space is named `cascade` on HF — the `_v1` is a subpath in Flask, not the repo name.
|
| 13 |
+
This means you can add `cascade_v2` later under the same Space without creating a new repo.
|
| 14 |
+
|
| 15 |
+
```
|
| 16 |
+
hf_space/
|
| 17 |
+
├── Dockerfile # Python 3.7, Redis, TF1.15, Keras 2.2.5
|
| 18 |
+
├── requirements.txt
|
| 19 |
+
├── start.sh # Redis → worker → gunicorn
|
| 20 |
+
├── app.py # Flask Blueprint mounted at /cascade_v1
|
| 21 |
+
├── worker.py # Redis consumer, runs GNN inference
|
| 22 |
+
├── setup_assets.sh # One-time script: copies binaries from nova app
|
| 23 |
+
├── README.md # HF Space card (YAML front-matter required)
|
| 24 |
+
├── .gitattributes # Git LFS tracking for .hdf5 and .p files
|
| 25 |
+
├── .gitignore
|
| 26 |
+
├── NMR_Prediction/
|
| 27 |
+
│ ├── __init__.py
|
| 28 |
+
│ ├── apply.py # ← copied + path-fixed by setup_assets.sh
|
| 29 |
+
│ ├── genConf.py # ← copied by setup_assets.sh
|
| 30 |
+
│ ├── valid.py # new (simple RDKit SMILES check)
|
| 31 |
+
│ ├── preprocessor.p # ← copied by setup_assets.sh
|
| 32 |
+
│ └── schnet_edgeupdate/
|
| 33 |
+
│ └── best_model.hdf5 # ← copied by setup_assets.sh (31 MB)
|
| 34 |
+
├── nfp/ # ← copied by setup_assets.sh
|
| 35 |
+
├── static/ # ← copied by setup_assets.sh
|
| 36 |
+
│ ├── main.css
|
| 37 |
+
│ ├── sketch.css
|
| 38 |
+
│ ├── JSmol.min.js
|
| 39 |
+
│ ├── images/
|
| 40 |
+
│ ├── j2s/
|
| 41 |
+
│ ├── jquery/
|
| 42 |
+
│ ├── js/
|
| 43 |
+
│ └── jsme/
|
| 44 |
+
└── templates/
|
| 45 |
+
└── cascade/
|
| 46 |
+
├── base.html
|
| 47 |
+
├── home.html
|
| 48 |
+
├── predict.html
|
| 49 |
+
├── results.html
|
| 50 |
+
└── about.html
|
| 51 |
+
```
|
| 52 |
+
|
| 53 |
+
---
|
| 54 |
+
|
| 55 |
+
## Step 1 — Copy assets from the nova app
|
| 56 |
+
|
| 57 |
+
Run the setup script (copies model weights, inference code, static files, and
|
| 58 |
+
applies the preprocessor path fix automatically):
|
| 59 |
+
|
| 60 |
+
```bash
|
| 61 |
+
cd ~/Documents/huggingface/cascade/hf_space
|
| 62 |
+
bash setup_assets.sh
|
| 63 |
+
```
|
| 64 |
+
|
| 65 |
+
You should see a ✓ for each of the 5 asset groups. Afterwards confirm:
|
| 66 |
+
|
| 67 |
+
```bash
|
| 68 |
+
ls NMR_Prediction/ # apply.py genConf.py preprocessor.p schnet_edgeupdate/ valid.py __init__.py
|
| 69 |
+
ls nfp/ # __init__.py layers/ models/ preprocessing/
|
| 70 |
+
ls static/ # main.css JSmol.min.js j2s/ jsme/ jquery/ js/ images/
|
| 71 |
+
```
|
| 72 |
+
|
| 73 |
+
---
|
| 74 |
+
|
| 75 |
+
## Step 2 — Test locally with Docker
|
| 76 |
+
|
| 77 |
+
```bash
|
| 78 |
+
cd ~/Documents/huggingface/cascade/hf_space
|
| 79 |
+
|
| 80 |
+
# Build (takes ~5 min first time — TF1.15 + RDKit are large)
|
| 81 |
+
docker build -t cascade-local .
|
| 82 |
+
|
| 83 |
+
# Run
|
| 84 |
+
docker run -p 7860:7860 cascade-local
|
| 85 |
+
|
| 86 |
+
# Open in browser
|
| 87 |
+
open http://localhost:7860/cascade_v1/predict/
|
| 88 |
+
```
|
| 89 |
+
|
| 90 |
+
**What to check:**
|
| 91 |
+
- Page loads, JSME editor opens
|
| 92 |
+
- Submitting `CC(=O)O` (acetic acid) returns a task_id (check browser Network tab)
|
| 93 |
+
- `check_task` eventually returns the results HTML with 2D SVG and shift table
|
| 94 |
+
- Worker logs appear in the Docker terminal: `Processing task ...`
|
| 95 |
+
|
| 96 |
+
**Common first-run issues:**
|
| 97 |
+
|
| 98 |
+
| Symptom | Fix |
|
| 99 |
+
|---|---|
|
| 100 |
+
| `ModuleNotFoundError: nfp` | Check `nfp/` is in the root of hf_space, not nested inside another folder |
|
| 101 |
+
| `OSError: preprocessor.p not found` | Re-run `setup_assets.sh` — the path fix may not have applied |
|
| 102 |
+
| `keras load_model` custom layer error | Check custom_objects in worker.py match the nfp layer class names exactly |
|
| 103 |
+
| Redis connection refused | Redis takes ~1 s to start; the worker retries automatically — wait a moment |
|
| 104 |
+
| 500 on `/cascade_v1/predict/` | Check `templates/cascade/predict.html` exists |
|
| 105 |
+
| JSME editor blank | Check `static/jsme/jsme.nocache.js` was copied across |
|
| 106 |
+
|
| 107 |
+
---
|
| 108 |
+
|
| 109 |
+
## Step 3 — Create the Hugging Face Space
|
| 110 |
+
|
| 111 |
+
```bash
|
| 112 |
+
# Install HF CLI if needed
|
| 113 |
+
pip install huggingface_hub
|
| 114 |
+
|
| 115 |
+
# Login (needs a write token from hf.co/settings/tokens)
|
| 116 |
+
huggingface-cli login
|
| 117 |
+
|
| 118 |
+
# Create the Space — named 'cascade', Docker SDK, public, under patonlab org
|
| 119 |
+
huggingface-cli repo create cascade \
|
| 120 |
+
--type space \
|
| 121 |
+
--space-sdk docker \
|
| 122 |
+
--organization patonlab
|
| 123 |
+
|
| 124 |
+
# This creates: https://huggingface.co/spaces/patonlab/cascade
|
| 125 |
+
# Live URL will be: https://patonlab-cascade.hf.space/cascade_v1/predict/
|
| 126 |
+
```
|
| 127 |
+
|
| 128 |
+
---
|
| 129 |
+
|
| 130 |
+
## Step 4 — Push to Hugging Face
|
| 131 |
+
|
| 132 |
+
```bash
|
| 133 |
+
cd ~/Documents/huggingface/cascade/hf_space
|
| 134 |
+
|
| 135 |
+
git init
|
| 136 |
+
git lfs install # Required — .hdf5 and .p are tracked via Git LFS
|
| 137 |
+
|
| 138 |
+
# .gitattributes is already written; just add it
|
| 139 |
+
git add .gitattributes
|
| 140 |
+
git remote add origin https://huggingface.co/spaces/patonlab/cascade
|
| 141 |
+
|
| 142 |
+
git add .
|
| 143 |
+
git commit -m "Initial CASCADE HF Space deployment"
|
| 144 |
+
git push origin main
|
| 145 |
+
```
|
| 146 |
+
|
| 147 |
+
The Space builds automatically. Build logs at:
|
| 148 |
+
`https://huggingface.co/spaces/patonlab/cascade` → Logs tab
|
| 149 |
+
|
| 150 |
+
Build time is typically **8–12 minutes** (TF1.15 + RDKit are large).
|
| 151 |
+
|
| 152 |
+
---
|
| 153 |
+
|
| 154 |
+
## Step 5 — Verify the live Space
|
| 155 |
+
|
| 156 |
+
Once the build is green:
|
| 157 |
+
|
| 158 |
+
1. `https://patonlab-cascade.hf.space/cascade_v1/predict/` — page loads
|
| 159 |
+
2. Submit `CC(=O)O` (acetic acid) — result in ~10–15 s
|
| 160 |
+
3. Submit `CC1=CC(=CC(=C1)O)C` — result in ~30–45 s
|
| 161 |
+
4. Test the JSME drawing tool → OK → predicted shifts labelled on 2D structure
|
| 162 |
+
|
| 163 |
+
---
|
| 164 |
+
|
| 165 |
+
## Step 6 — Update the GitHub README
|
| 166 |
+
|
| 167 |
+
Update `patonlab/CASCADE` on GitHub to point to the new URL:
|
| 168 |
+
|
| 169 |
+
```markdown
|
| 170 |
+
The production web server can be found here:
|
| 171 |
+
https://patonlab-cascade.hf.space/cascade_v1/predict/
|
| 172 |
+
```
|
| 173 |
+
|
| 174 |
+
---
|
| 175 |
+
|
| 176 |
+
## Ongoing maintenance
|
| 177 |
+
|
| 178 |
+
**Pushing updates:**
|
| 179 |
+
```bash
|
| 180 |
+
cd ~/Documents/huggingface/cascade/hf_space
|
| 181 |
+
git add . && git commit -m "your message" && git push origin main
|
| 182 |
+
```
|
| 183 |
+
HF rebuilds automatically on each push.
|
| 184 |
+
|
| 185 |
+
**Checking logs:** HF Space → Logs tab (worker output streams there in real time)
|
| 186 |
+
|
| 187 |
+
**Cold starts:** The free HF tier sleeps after inactivity. First request after sleep
|
| 188 |
+
takes ~30 s. Upgrade to HF Pro ($9/mo) for persistent Spaces if needed.
|
| 189 |
+
|
| 190 |
+
**Redis:** Results have a 1-hour TTL and do not persist across container restarts.
|
| 191 |
+
This is fine for interactive use.
|
| 192 |
+
|
| 193 |
+
---
|
| 194 |
+
|
| 195 |
+
## What changed from the original Django app
|
| 196 |
+
|
| 197 |
+
| Original (nova) | New HF Space |
|
| 198 |
+
|---|---|
|
| 199 |
+
| Django 2.1 | Flask 2.0 with Blueprint at `/cascade_v1` |
|
| 200 |
+
| `uwsgi` | `gunicorn` |
|
| 201 |
+
| `django.template` tags | Plain Jinja2 (`/static/...` paths) |
|
| 202 |
+
| `ALLOWED_HOSTS = ['129.82.62.62']` | Flask `HOST=0.0.0.0` |
|
| 203 |
+
| Hard-coded `SECRET_KEY` | Not needed (no sessions/auth) |
|
| 204 |
+
| `db.sqlite3` | Removed (unused) |
|
| 205 |
+
| `cascade/preprocessor.p` path | `NMR_Prediction/preprocessor.p` |
|
| 206 |
+
| Model loaded in Django views at startup | Loaded once in worker.py at startup |
|
| 207 |
+
| Manual file copies + sed fix | Automated via `setup_assets.sh` |
|
Dockerfile
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM --platform=linux/amd64 python:3.7-slim
|
| 2 |
+
|
| 3 |
+
# System dependencies
|
| 4 |
+
RUN apt-get update && apt-get install -y \
|
| 5 |
+
build-essential \
|
| 6 |
+
redis-server \
|
| 7 |
+
libxrender1 \
|
| 8 |
+
libxext6 \
|
| 9 |
+
libgl1-mesa-glx \
|
| 10 |
+
libglib2.0-0 \
|
| 11 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 12 |
+
|
| 13 |
+
WORKDIR /app
|
| 14 |
+
|
| 15 |
+
# Install Python dependencies
|
| 16 |
+
COPY requirements.txt .
|
| 17 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 18 |
+
|
| 19 |
+
# Copy all application code
|
| 20 |
+
COPY . .
|
| 21 |
+
|
| 22 |
+
# HF Spaces requires port 7860
|
| 23 |
+
EXPOSE 7860
|
| 24 |
+
|
| 25 |
+
# Startup: Redis + worker + Flask
|
| 26 |
+
CMD ["bash", "start.sh"]
|
NMR_Prediction/__init__.py
ADDED
|
File without changes
|
NMR_Prediction/apply.py
ADDED
|
@@ -0,0 +1,255 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys,io,os
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
import gzip, pickle, argparse, warnings
|
| 5 |
+
import pickle
|
| 6 |
+
import math
|
| 7 |
+
|
| 8 |
+
from tqdm import tqdm
|
| 9 |
+
|
| 10 |
+
from rdkit import Chem
|
| 11 |
+
from rdkit.Chem import AllChem
|
| 12 |
+
from rdkit.Chem import ForwardSDMolSupplier
|
| 13 |
+
|
| 14 |
+
from itertools import islice
|
| 15 |
+
|
| 16 |
+
from nfp.preprocessing import MolAPreprocessor, GraphSequence
|
| 17 |
+
from .genConf import genConf
|
| 18 |
+
|
| 19 |
+
import keras
|
| 20 |
+
import keras.backend as K
|
| 21 |
+
|
| 22 |
+
from keras.callbacks import ModelCheckpoint, CSVLogger, LearningRateScheduler
|
| 23 |
+
|
| 24 |
+
from keras.layers import (Input, Embedding, Dense, BatchNormalization,
|
| 25 |
+
Concatenate, Multiply, Add)
|
| 26 |
+
|
| 27 |
+
from keras.models import Model, load_model
|
| 28 |
+
|
| 29 |
+
from nfp.layers import (MessageLayer, GRUStep, Squeeze, EdgeNetwork,
|
| 30 |
+
ReduceBondToPro, ReduceBondToAtom,
|
| 31 |
+
GatherAtomToBond, ReduceAtomToPro)
|
| 32 |
+
from nfp.models import GraphModel
|
| 33 |
+
|
| 34 |
+
def to_C(atom):
|
| 35 |
+
neighbors = [x.GetAtomicNum() for x in atom.GetNeighbors()]
|
| 36 |
+
if 6 in neighbors:
|
| 37 |
+
return True
|
| 38 |
+
else:
|
| 39 |
+
return False
|
| 40 |
+
|
| 41 |
+
def Mol_iter(df):
|
| 42 |
+
for index,r in df.iterrows():
|
| 43 |
+
yield(r['Mol'], r['atom_index'])
|
| 44 |
+
|
| 45 |
+
def preprocess_C(mols, preprocessor, keep_all_cf=False):
|
| 46 |
+
mols_id = []
|
| 47 |
+
confs_id = []
|
| 48 |
+
mols_conf = []
|
| 49 |
+
for i,m in enumerate(mols):
|
| 50 |
+
try:
|
| 51 |
+
mol,ids,nr = genConf(m, rms=-1, nc=200, efilter=10.0, rmspost=0.5)
|
| 52 |
+
except Exception as e:
|
| 53 |
+
msg = "cannot generate FF conformers"
|
| 54 |
+
print(msg)
|
| 55 |
+
mols_conf.append(None)
|
| 56 |
+
continue
|
| 57 |
+
|
| 58 |
+
mols_i = [Chem.RWMol(mol) for x in ids]
|
| 59 |
+
if keep_all_cf:
|
| 60 |
+
for m_i,id in zip(mols_i, ids):
|
| 61 |
+
m_i.SetProp('E', str(id[0]))
|
| 62 |
+
m_i.SetProp('_Name', '{}_{}'.format(i, id[1]))
|
| 63 |
+
for ia in range(mol.GetNumAtoms()):
|
| 64 |
+
m_i.GetConformer().SetAtomPosition(ia, mol.GetConformer(id[1]).GetAtomPosition(ia))
|
| 65 |
+
mols_conf.extend(mols_i)
|
| 66 |
+
|
| 67 |
+
else:
|
| 68 |
+
id = ids[0]
|
| 69 |
+
m.SetProp('E', str(id[0]))
|
| 70 |
+
m.SetProp('_Name', '{}_{}'.format(i, id[1]))
|
| 71 |
+
|
| 72 |
+
df = []
|
| 73 |
+
for m in mols_conf:
|
| 74 |
+
if m:
|
| 75 |
+
E = float(m.GetProp('E'))
|
| 76 |
+
m_id, cf_id = [ int(x) for x in m.GetProp('_Name').split('_') ]
|
| 77 |
+
Cs = [x for x in m.GetAtoms() if x.GetAtomicNum()==6]
|
| 78 |
+
C_index = np.array([x.GetIdx() for x in Cs]).astype(int)
|
| 79 |
+
df.append([m_id, m, m.GetNumAtoms(), C_index, E, cf_id])
|
| 80 |
+
|
| 81 |
+
df = pd.DataFrame(df, columns=['mol_id', 'Mol', 'n_atoms', 'atom_index', 'relative_E', 'cf_id'])
|
| 82 |
+
|
| 83 |
+
inputs = preprocessor.predict(Mol_iter(df))
|
| 84 |
+
|
| 85 |
+
return inputs, df, mols_conf
|
| 86 |
+
|
| 87 |
+
class RBFSequence(GraphSequence):
|
| 88 |
+
def process_data(self, batch_data):
|
| 89 |
+
batch_data['distance_rbf'] = self.rbf_expansion(batch_data['distance'])
|
| 90 |
+
|
| 91 |
+
offset = self._compute_stacked_offsets(
|
| 92 |
+
batch_data['n_pro'], batch_data['n_atom'])
|
| 93 |
+
|
| 94 |
+
offset = np.where(batch_data['atom_index']>=0, offset, 0)
|
| 95 |
+
batch_data['atom_index'] += offset
|
| 96 |
+
|
| 97 |
+
del batch_data['n_atom']
|
| 98 |
+
del batch_data['n_bond']
|
| 99 |
+
del batch_data['distance']
|
| 100 |
+
return batch_data
|
| 101 |
+
|
| 102 |
+
def _compute_stacked_offsets(self, sizes, repeats):
|
| 103 |
+
return np.repeat(np.cumsum(np.hstack([0, sizes[:-1]])), repeats)
|
| 104 |
+
|
| 105 |
+
def rbf_expansion(self, distances, mu=0, delta=0.1, kmax=256):
|
| 106 |
+
k = np.arange(0, kmax)
|
| 107 |
+
logits = -(np.atleast_2d(distances).T - (-mu + delta * k))**2 / delta
|
| 108 |
+
return np.exp(logits)
|
| 109 |
+
|
| 110 |
+
def evaluate_C(inputs, preprocessor, model):
|
| 111 |
+
batch_size = 32
|
| 112 |
+
evaluate_sequence = RBFSequence(inputs, batch_size=batch_size)
|
| 113 |
+
|
| 114 |
+
predicted = []
|
| 115 |
+
|
| 116 |
+
for x in evaluate_sequence:
|
| 117 |
+
out = model.predict_on_batch(x)
|
| 118 |
+
out = np.concatenate(out)
|
| 119 |
+
predicted.extend(out)
|
| 120 |
+
|
| 121 |
+
return predicted
|
| 122 |
+
|
| 123 |
+
def predict_NMR_C(smiles, model):
|
| 124 |
+
|
| 125 |
+
mols = [Chem.MolFromSmiles(smiles)]
|
| 126 |
+
for m in mols: AllChem.EmbedMolecule(m, useRandomCoords=True)
|
| 127 |
+
mols = [Chem.AddHs(m, addCoords=True) for m in mols]
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
with open(os.path.join('NMR_Prediction', 'preprocessor.p'), 'rb') as ft:
|
| 131 |
+
preprocessor = pickle.load(ft)['preprocessor']
|
| 132 |
+
|
| 133 |
+
inputs, df, mols = preprocess_C(mols, preprocessor, True)
|
| 134 |
+
|
| 135 |
+
if len(inputs) == 0:
|
| 136 |
+
raise RuntimeError('Failed to find any conformer for the given molecule')
|
| 137 |
+
|
| 138 |
+
predicted = evaluate_C(inputs, preprocessor, model)
|
| 139 |
+
|
| 140 |
+
spread_df = pd.DataFrame([], columns=['mol_id', 'atom_index', 'relative_E', 'cf_id'])
|
| 141 |
+
for _,r in df.iterrows():
|
| 142 |
+
mol_id=[r.mol_id]*len(r.atom_index)
|
| 143 |
+
cf_id = [r.cf_id]*len(r.atom_index)
|
| 144 |
+
E = [r.relative_E]*len(r.atom_index)
|
| 145 |
+
df_mol = pd.DataFrame(data={'mol_id': mol_id, 'atom_index': r.atom_index, 'relative_E':E, 'cf_id': cf_id})
|
| 146 |
+
spread_df = pd.concat([spread_df, df_mol], sort=True)
|
| 147 |
+
spread_df['predicted'] = predicted
|
| 148 |
+
spread_df['b_weight'] = spread_df.relative_E.apply(lambda x: math.exp(-x/(0.001987*298.15)))
|
| 149 |
+
|
| 150 |
+
df_group = spread_df.set_index(['mol_id', 'atom_index', 'cf_id']).groupby(level=[0,1])
|
| 151 |
+
final = []
|
| 152 |
+
for (m_id, a_id),df in df_group:
|
| 153 |
+
weighted_shift = df.apply(lambda x: x['b_weight']*x['predicted'], axis=1).sum()/df.b_weight.sum()
|
| 154 |
+
final.append([m_id, a_id, weighted_shift])
|
| 155 |
+
|
| 156 |
+
final = pd.DataFrame(final, columns=['mol_id', 'atom_index', 'Shift'])
|
| 157 |
+
final['atom_index'] = final.atom_index.apply(lambda x: x+1)
|
| 158 |
+
final = final.round(2).astype(dtype={'atom_index':'int'})
|
| 159 |
+
spread_df['atom_index'] = spread_df.atom_index.apply(lambda x: x+1)
|
| 160 |
+
spread_df = spread_df.round(2)
|
| 161 |
+
|
| 162 |
+
return(mols, final, spread_df)
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
def preprocess_H(mols, preprocessor, keep_all_cf=False):
|
| 166 |
+
mols_id = []
|
| 167 |
+
confs_id = []
|
| 168 |
+
mols_conf = []
|
| 169 |
+
for i,m in enumerate(mols):
|
| 170 |
+
try:
|
| 171 |
+
mol,ids,nr = genConf(m, rms=-1, nc=200, efilter=10.0, rmspost=0.5)
|
| 172 |
+
except Exception as e:
|
| 173 |
+
msg = "cannot generate FF conformers"
|
| 174 |
+
print(msg)
|
| 175 |
+
mols_conf.append(None)
|
| 176 |
+
continue
|
| 177 |
+
|
| 178 |
+
mols_i = [Chem.RWMol(mol) for x in ids]
|
| 179 |
+
if keep_all_cf:
|
| 180 |
+
for m_i,id in zip(mols_i, ids):
|
| 181 |
+
m_i.SetProp('E', str(id[0]))
|
| 182 |
+
m_i.SetProp('_Name', '{}_{}'.format(i, id[1]))
|
| 183 |
+
for ia in range(mol.GetNumAtoms()):
|
| 184 |
+
m_i.GetConformer().SetAtomPosition(ia, mol.GetConformer(id[1]).GetAtomPosition(ia))
|
| 185 |
+
mols_conf.extend(mols_i)
|
| 186 |
+
|
| 187 |
+
else:
|
| 188 |
+
id = ids[0]
|
| 189 |
+
m.SetProp('E', str(id[0]))
|
| 190 |
+
m.SetProp('_Name', '{}_{}'.format(i, id[1]))
|
| 191 |
+
|
| 192 |
+
df = []
|
| 193 |
+
for m in mols_conf:
|
| 194 |
+
if m:
|
| 195 |
+
E = float(m.GetProp('E'))
|
| 196 |
+
m_id, cf_id = [ int(x) for x in m.GetProp('_Name').split('_') ]
|
| 197 |
+
Hs = [x for x in m.GetAtoms() if x.GetAtomicNum()==1]
|
| 198 |
+
H_index = np.array([x.GetIdx() for x in Hs]).astype(int)
|
| 199 |
+
df.append([m_id, m, m.GetNumAtoms(), H_index, E, cf_id])
|
| 200 |
+
|
| 201 |
+
df = pd.DataFrame(df, columns=['mol_id', 'Mol', 'n_atoms', 'atom_index', 'relative_E', 'cf_id'])
|
| 202 |
+
|
| 203 |
+
inputs = preprocessor.predict(Mol_iter(df))
|
| 204 |
+
|
| 205 |
+
return inputs, df, mols_conf
|
| 206 |
+
|
| 207 |
+
def evaluate_H(inputs, preprocessor, model):
|
| 208 |
+
batch_size = 32
|
| 209 |
+
evaluate_sequence = RBFSequence(inputs, batch_size=batch_size)
|
| 210 |
+
|
| 211 |
+
predicted = []
|
| 212 |
+
|
| 213 |
+
for x in evaluate_sequence:
|
| 214 |
+
out = model.predict_on_batch(x)
|
| 215 |
+
out = np.concatenate(out)
|
| 216 |
+
predicted.extend(out)
|
| 217 |
+
|
| 218 |
+
return predicted
|
| 219 |
+
|
| 220 |
+
def predict_NMR_H(smiles, model):
|
| 221 |
+
|
| 222 |
+
mols = [Chem.MolFromSmiles(smiles)]
|
| 223 |
+
for m in mols: AllChem.EmbedMolecule(m, useRandomCoords=True)
|
| 224 |
+
mols = [Chem.AddHs(m, addCoords=True) for m in mols]
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
with open(os.path.join('NMR_Prediction', 'preprocessor.p'), 'rb') as ft:
|
| 228 |
+
preprocessor = pickle.load(ft)['preprocessor']
|
| 229 |
+
|
| 230 |
+
inputs, df, mols = preprocess_H(mols, preprocessor, True)
|
| 231 |
+
predicted = evaluate_H(inputs, preprocessor, model)
|
| 232 |
+
|
| 233 |
+
spread_df = pd.DataFrame([], columns=['mol_id', 'atom_index', 'relative_E', 'cf_id'])
|
| 234 |
+
for _,r in df.iterrows():
|
| 235 |
+
mol_id=[r.mol_id]*len(r.atom_index)
|
| 236 |
+
cf_id = [r.cf_id]*len(r.atom_index)
|
| 237 |
+
E = [r.relative_E]*len(r.atom_index)
|
| 238 |
+
df_mol = pd.DataFrame(data={'mol_id': mol_id, 'atom_index': r.atom_index, 'relative_E':E, 'cf_id': cf_id})
|
| 239 |
+
spread_df = pd.concat([spread_df, df_mol], sort=True)
|
| 240 |
+
spread_df['predicted'] = predicted
|
| 241 |
+
spread_df['b_weight'] = spread_df.relative_E.apply(lambda x: math.exp(-x/(0.001987*298.15)))
|
| 242 |
+
|
| 243 |
+
df_group = spread_df.set_index(['mol_id', 'atom_index', 'cf_id']).groupby(level=[0,1])
|
| 244 |
+
final = []
|
| 245 |
+
for (m_id, a_id),df in df_group:
|
| 246 |
+
weighted_shift = df.apply(lambda x: x['b_weight']*x['predicted'], axis=1).sum()/df.b_weight.sum()
|
| 247 |
+
final.append([m_id, a_id, weighted_shift])
|
| 248 |
+
|
| 249 |
+
final = pd.DataFrame(final, columns=['mol_id', 'atom_index', 'Shift'])
|
| 250 |
+
final['atom_index'] = final.atom_index.apply(lambda x: x+1)
|
| 251 |
+
final = final.round(2).astype(dtype={'atom_index':'int'})
|
| 252 |
+
spread_df['atom_index'] = spread_df.atom_index.apply(lambda x: x+1)
|
| 253 |
+
spread_df = spread_df.round(2)
|
| 254 |
+
|
| 255 |
+
return(mols, final, spread_df)
|
NMR_Prediction/genConf.py
ADDED
|
@@ -0,0 +1,243 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/python
|
| 2 |
+
from __future__ import print_function, absolute_import
|
| 3 |
+
|
| 4 |
+
#######################################################################
|
| 5 |
+
# Molecular conformer generator in progress
|
| 6 |
+
# genConf.py -isdf file_input.sdf -osdf file_output.sdf
|
| 7 |
+
# -n number_of_conformers (optional, if not specified is based
|
| 8 |
+
# on the nomber of rotable bonds) -rtpre rms_threshold_pre_opt(optional)
|
| 9 |
+
# -rtpost rms_threshold_post_opt(optional) -e energy_window (optional, Kcal/mol)
|
| 10 |
+
# -t number_of_threads (if not specify 1)
|
| 11 |
+
#######################################################################
|
| 12 |
+
|
| 13 |
+
## known issues / to-do list
|
| 14 |
+
## logging doesn't work properly
|
| 15 |
+
## need to print out rotatable bond atoms for reference
|
| 16 |
+
## comparison of printing vs. old full monte - more options
|
| 17 |
+
## comparison of printing vs. GoodVibes
|
| 18 |
+
## tabulation of results in addition to sdf output file
|
| 19 |
+
## pythonify where possible
|
| 20 |
+
## currently RMS but torsion_list would be better?
|
| 21 |
+
|
| 22 |
+
from rdkit import Chem
|
| 23 |
+
from rdkit.Chem import AllChem
|
| 24 |
+
from concurrent import futures
|
| 25 |
+
import argparse, logging, os, sys, time, copy
|
| 26 |
+
logger = logging.getLogger(__name__)
|
| 27 |
+
|
| 28 |
+
# PHYSICAL CONSTANTS
|
| 29 |
+
GAS_CONSTANT, PLANCK_CONSTANT, BOLTZMANN_CONSTANT, SPEED_OF_LIGHT, AVOGADRO_CONSTANT, AMU_to_KG, atmos = 8.3144621, 6.62606957e-34, 1.3806488e-23, 2.99792458e10, 6.0221415e23, 1.66053886E-27, 101.325
|
| 30 |
+
# UNIT CONVERSION
|
| 31 |
+
j_to_au = 4.184 * 627.509541 * 1000.0
|
| 32 |
+
|
| 33 |
+
# version number
|
| 34 |
+
__version__ = "1.0.1"
|
| 35 |
+
|
| 36 |
+
# Formatted output to command line and log file
|
| 37 |
+
class Logger:
|
| 38 |
+
# Designated initializer
|
| 39 |
+
def __init__(self, filein, ext):
|
| 40 |
+
# check to see if already exists
|
| 41 |
+
if os.path.exists(filein+"."+ext):
|
| 42 |
+
var = input("\no Logger file %s already exists! OK to proceed? (Y/N) " % (filein+"."+ext))
|
| 43 |
+
if var.lower().strip() == "n" or var.lower().strip() == "no":
|
| 44 |
+
logger.error("\n OK. Exiting gracefully ...\n"); sys.exit(1)
|
| 45 |
+
# Create the log file at the input path
|
| 46 |
+
self.log = open(filein+"."+ext, 'w' )
|
| 47 |
+
|
| 48 |
+
# Write a message to the log
|
| 49 |
+
def Write(self, message):
|
| 50 |
+
# Print the message
|
| 51 |
+
logger.info(message)
|
| 52 |
+
# Write to log
|
| 53 |
+
self.log.write(message + "\n")
|
| 54 |
+
|
| 55 |
+
# Write a message only to the log and not to the terminal
|
| 56 |
+
def Writeonlyfile(self, message):
|
| 57 |
+
# Write to log
|
| 58 |
+
self.log.write(message)
|
| 59 |
+
|
| 60 |
+
# Write a fatal error, finalize and terminate the program
|
| 61 |
+
def Fatal(self, message):
|
| 62 |
+
# Print the message
|
| 63 |
+
logger.error(message+"\n")
|
| 64 |
+
# Write to log
|
| 65 |
+
self.log.write(message + "\n")
|
| 66 |
+
# Finalize the log
|
| 67 |
+
self.Finalize()
|
| 68 |
+
# End the program
|
| 69 |
+
sys.exit(1)
|
| 70 |
+
|
| 71 |
+
# Finalize the log file
|
| 72 |
+
def Finalize(self):
|
| 73 |
+
self.log.close()
|
| 74 |
+
|
| 75 |
+
dashedline = " ------------------------------------------------------------------------------------------------------------------"
|
| 76 |
+
emptyline = " | |"
|
| 77 |
+
normaltermination = "\n ----------------- N O R M A L T E R M I N A T I O N ----------------\n"
|
| 78 |
+
leftcol=97
|
| 79 |
+
rightcol=12
|
| 80 |
+
|
| 81 |
+
asciiArt = " ___ ___ ___ ___ ___ ___ \n / /\\ /__/\\ /__/\\ / /\\ /__/\\ ___ / /\\\n / /:/_ \\ \\:\\ | |::\\ / /::\\ \\ \\:\\ / /\\ / /:/_\n / /:/ /\\ \\ \\:\\ ___ ___ ___ ___ | |:|:\\ / /:/\\:\\ \\ \\:\\ / /:/ / /:/ /\\\n / /:/ /:/___ \\ \\:\\ /__/\\ / /\\/__/\\ / /\\ __|__|:|\\:\\ / /:/ \\:\\ _____\\__\\:\\ / /:/ / /:/ /:/_\n/__/:/ /://__/\\ \\__\\:\\\\ \\:\\ / /:/\\ \\:\\ / /://__/::::| \\:\\/__/:/ \\__\\:\\/__/::::::::\\ / /::\\ /__/:/ /:/ /\\\n\\ \\:\\/:/ \\ \\:\\ / /:/ \\ \\:\\ /:/ \\ \\:\\ /:/ \\ \\:\\~~\\__\\/\\ \\:\\ / /:/\\ \\:\\~~\\~~\\//__/:/\\:\\\\ \\:\\/:/ /:/\n \\ \\::/ \\ \\:\\ /:/ \\ \\:\\/:/ \\ \\:\\/:/ \\ \\:\\ \\ \\:\\ /:/ \\ \\:\\ ~~~ \\__\\/ \\:\\\\ \\::/ /:/\n \\ \\:\\ \\ \\:\\/:/ \\ \\::/ \\ \\::/ \\ \\:\\ \\ \\:\\/:/ \\ \\:\\ \\ \\:\\\\ \\:\\/:/\n \\ \\:\\ \\ \\::/ \\__\\/ \\__\\/ \\ \\:\\ \\ \\::/ \\ \\:\\ \\__\\/ \\ \\::/\n \\__\\/ \\__\\/ \\__\\/ \\__\\/ \\__\\/ \\__\\/\n "
|
| 82 |
+
|
| 83 |
+
# algorithm to generate nc conformations
|
| 84 |
+
def genConf(m, nc, rms, efilter, rmspost):
|
| 85 |
+
nr = int(AllChem.CalcNumRotatableBonds(m))
|
| 86 |
+
#m = Chem.AddHs(m)
|
| 87 |
+
Chem.AssignAtomChiralTagsFromStructure(m, replaceExistingTags=True)
|
| 88 |
+
if not nc: nc = 3**nr
|
| 89 |
+
|
| 90 |
+
print(dashedline+"\n | "+("FULL_MONTE search").ljust(leftcol)+("|").rjust(rightcol))
|
| 91 |
+
#log.Write(" | o "+("COMP: "+str(Params.COMP)+" degrees").ljust(leftcol)+("|").rjust(rightcol))
|
| 92 |
+
#log.Write(" | o "+("LEVL: "+str(Params.LEVL)+" force field").ljust(leftcol)+("|").rjust(rightcol))
|
| 93 |
+
#log.Write(" | o "+("DEMX: "+str(Params.DEMX)+" kcal/mol").ljust(leftcol)+("|").rjust(rightcol))
|
| 94 |
+
print(" | o "+("EWIN: "+str(efilter)+" kcal/mol").ljust(leftcol)+("|").rjust(rightcol))
|
| 95 |
+
print(" | o "+("MCNV: "+str(nr)+" ROTATABLE BONDS").ljust(leftcol)+("|").rjust(rightcol))
|
| 96 |
+
#log.Write(" | "+torstring.ljust(leftcol)+("|").rjust(rightcol))
|
| 97 |
+
print(" | o "+("STEP: "+str(nc)+" (ESTIMATED CONFORMER SPACE: "+str(nr**3)+")").ljust(leftcol)+("|").rjust(rightcol))
|
| 98 |
+
print(dashedline+"\n")
|
| 99 |
+
|
| 100 |
+
if not rms: rms = -1
|
| 101 |
+
ids=AllChem.EmbedMultipleConfs(m, numConfs=nc, pruneRmsThresh=rms, randomSeed=1, useExpTorsionAnglePrefs=True, useBasicKnowledge=True)
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
if len(ids)== 0:
|
| 105 |
+
ids = m.AddConformer(m.GetConformer, assignID=True)
|
| 106 |
+
|
| 107 |
+
diz = []
|
| 108 |
+
diz2 = []
|
| 109 |
+
diz3 = []
|
| 110 |
+
for id in ids:
|
| 111 |
+
prop = AllChem.MMFFGetMoleculeProperties(m, mmffVariant="MMFF94s")
|
| 112 |
+
ff = AllChem.MMFFGetMoleculeForceField(m, prop, confId=id)
|
| 113 |
+
ff.Minimize()
|
| 114 |
+
en = float(ff.CalcEnergy())
|
| 115 |
+
econf = (en, id)
|
| 116 |
+
diz.append(econf)
|
| 117 |
+
|
| 118 |
+
if efilter != "Y":
|
| 119 |
+
n, diz2 = energy_filter(m, diz, efilter)
|
| 120 |
+
else:
|
| 121 |
+
n = m
|
| 122 |
+
diz2 = diz
|
| 123 |
+
|
| 124 |
+
if rmspost != None and n.GetNumConformers() > 1:
|
| 125 |
+
o, diz3 = postrmsd(n, diz2, rmspost)
|
| 126 |
+
else:
|
| 127 |
+
o = n
|
| 128 |
+
diz3 = diz2
|
| 129 |
+
|
| 130 |
+
return o, diz3, nr
|
| 131 |
+
|
| 132 |
+
# filter conformers based on relative energy
|
| 133 |
+
def energy_filter(m, diz, efilter):
|
| 134 |
+
print("o FILTERING CONFORMERS BY ENERGY CUTOFF: "+str(efilter)+" kcal/mol")
|
| 135 |
+
diz.sort()
|
| 136 |
+
mini = float(diz[0][0])
|
| 137 |
+
sup = mini + efilter
|
| 138 |
+
n = Chem.Mol(m)
|
| 139 |
+
n.RemoveAllConformers()
|
| 140 |
+
n.AddConformer(m.GetConformer(int(diz[0][1])))
|
| 141 |
+
nid = []
|
| 142 |
+
ener = []
|
| 143 |
+
nid.append(int(diz[0][1]))
|
| 144 |
+
ener.append(float(diz[0][0])-mini)
|
| 145 |
+
del diz[0]
|
| 146 |
+
for x,y in diz:
|
| 147 |
+
if x <= sup:
|
| 148 |
+
#print(" KEEP - Erel:", x-mini)
|
| 149 |
+
n.AddConformer(m.GetConformer(int(y)))
|
| 150 |
+
nid.append(int(y))
|
| 151 |
+
ener.append(float(x-mini))
|
| 152 |
+
else:
|
| 153 |
+
#print(" REMOVE - Erel:", x-mini)
|
| 154 |
+
break
|
| 155 |
+
diz2 = list(zip(ener, nid))
|
| 156 |
+
print(" KEEPING "+str(len(ener))+" CONFORMERS")
|
| 157 |
+
return n, diz2
|
| 158 |
+
|
| 159 |
+
# filter conformers based on geometric RMS
|
| 160 |
+
def postrmsd(n, diz2, rmspost):
|
| 161 |
+
print("o FILTERING CONFORMERS BY RMS: "+str(rmspost))
|
| 162 |
+
diz2.sort(key=lambda x: x[0])
|
| 163 |
+
o = Chem.Mol(n)
|
| 164 |
+
confidlist = [diz2[0][1]]
|
| 165 |
+
enval = [diz2[0][0]]
|
| 166 |
+
nh = Chem.RemoveHs(n)
|
| 167 |
+
del diz2[0]
|
| 168 |
+
for z,w in diz2:
|
| 169 |
+
confid = int(w)
|
| 170 |
+
p=0
|
| 171 |
+
for conf2id in confidlist:
|
| 172 |
+
#print(confid, conf2id)
|
| 173 |
+
rmsd = AllChem.GetBestRMS(nh, nh, prbId=confid, refId=conf2id)
|
| 174 |
+
if rmsd < rmspost:
|
| 175 |
+
p=p+1
|
| 176 |
+
break
|
| 177 |
+
if p == 0:
|
| 178 |
+
confidlist.append(int(confid))
|
| 179 |
+
enval.append(float(z))
|
| 180 |
+
diz3 = list(zip(enval, confidlist))
|
| 181 |
+
print(" KEEPING "+str(len(enval))+" CONFORMERS")
|
| 182 |
+
return o, diz3
|
| 183 |
+
|
| 184 |
+
# conformational search / handles parallel threads if more than one structure is defined
|
| 185 |
+
def csearch(log, suppl, writer, args):
|
| 186 |
+
with futures.ProcessPoolExecutor(max_workers=args.threads) as executor:
|
| 187 |
+
jobs = []
|
| 188 |
+
nm = []
|
| 189 |
+
numMol=0
|
| 190 |
+
for mol in suppl:
|
| 191 |
+
numMol = numMol+1
|
| 192 |
+
if mol is not None:
|
| 193 |
+
nm.append(mol.GetProp('_Name'))
|
| 194 |
+
job = executor.submit(genConf, mol, args.nconf, args.rmspre, args.cutoff, args.rmspost)
|
| 195 |
+
jobs.append(job)
|
| 196 |
+
else:
|
| 197 |
+
log.Write("ERROR: Impossible to generate conformers for molecule number " + str(numMol))
|
| 198 |
+
widgets = ["Generating conformations; ", progressbar.Percentage(), " ", progressbar.ETA(), " ", progressbar.Bar()]
|
| 199 |
+
|
| 200 |
+
# If there are several structures use progressbar
|
| 201 |
+
if len(jobs) > 1:
|
| 202 |
+
for job in futures.as_completed(jobs):
|
| 203 |
+
mol,ids,nr = job.result()
|
| 204 |
+
|
| 205 |
+
for j in range(0, len(jobs)):
|
| 206 |
+
mol,ids,nr = jobs[j].result()
|
| 207 |
+
name = nm[j]
|
| 208 |
+
for en,id in ids:
|
| 209 |
+
mol.SetProp('_Name', name)
|
| 210 |
+
|
| 211 |
+
if args.printproperty == True:
|
| 212 |
+
mol.SetProp('ConfId', str(id))
|
| 213 |
+
mol.SetProp('ConfEnergies', str(en)+ " kcal/mol")
|
| 214 |
+
mol.SetProp('Rotatable Bonds Number', str(nr))
|
| 215 |
+
writer.write(mol, confId=id)
|
| 216 |
+
writer.close()
|
| 217 |
+
|
| 218 |
+
if __name__ == "__main__":
|
| 219 |
+
parser = argparse.ArgumentParser(description='Molecular conformer generator')
|
| 220 |
+
parser.add_argument('-isdf', required=True, help='sdf input file')
|
| 221 |
+
parser.add_argument('-osdf', required=True, help='sdf output file')
|
| 222 |
+
parser.add_argument('-nconf', type=int, required=False, help='number of conformers')
|
| 223 |
+
parser.add_argument('-rmspre', type=float, required=False, help='rms threshold pre optimization')
|
| 224 |
+
parser.add_argument('-rmspost', type=float, required=False, default=0.2, help='rms threshold post minimization')
|
| 225 |
+
parser.add_argument('-cutoff', type=float, required=False, default=10.0, help='energy window')
|
| 226 |
+
parser.add_argument('-printproperty', action='store_true', default=True, help='Print molecule properties (energy and rotable bond number)')
|
| 227 |
+
parser.add_argument('-threads', type=int, required=False, default=1, help='number of threads')
|
| 228 |
+
args = parser.parse_args()
|
| 229 |
+
|
| 230 |
+
# Check that the input structure exists and has the correct format
|
| 231 |
+
if os.path.exists(args.isdf) and os.path.splitext(args.isdf)[1] in [".sdf"]:
|
| 232 |
+
inp = args.isdf; out = args.osdf
|
| 233 |
+
filename = os.path.splitext(inp)[0]
|
| 234 |
+
|
| 235 |
+
# Define input and outputs
|
| 236 |
+
log = Logger(filename,"cs"); writer = Chem.SDWriter(out); suppl = Chem.SDMolSupplier(inp, removeHs=False)
|
| 237 |
+
log.Write("\no Extracting structure from "+args.isdf+" ...")
|
| 238 |
+
|
| 239 |
+
# conformational search
|
| 240 |
+
search = csearch(log, suppl, writer, args)
|
| 241 |
+
|
| 242 |
+
end = time.strftime("%H:%M:%S", time.localtime())
|
| 243 |
+
log.Write(asciiArt+end); log.Write(normaltermination); log.Finalize()
|
NMR_Prediction/preprocessor.p
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e9160321e192de2a5ddf706be7b048a634b79e8d928feea6b1558151349bcb21
|
| 3 |
+
size 4556
|
NMR_Prediction/schnet_edgeupdate/best_model.hdf5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:056b2da63696eb4a8f319ed52c0a9f8e20a0971d5f471b9a89f8468ab9ee5180
|
| 3 |
+
size 32938712
|
NMR_Prediction/valid.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from rdkit import Chem
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def validate_smiles(smiles: str) -> bool:
|
| 5 |
+
"""Return True if the SMILES is parseable by RDKit and non-empty."""
|
| 6 |
+
if not smiles or not smiles.strip():
|
| 7 |
+
return False
|
| 8 |
+
mol = Chem.MolFromSmiles(smiles.strip())
|
| 9 |
+
return mol is not None
|
README.md
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: CASCADE NMR Predictor
|
| 3 |
+
emoji: 🧪
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: indigo
|
| 6 |
+
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
+
pinned: false
|
| 9 |
+
license: mit
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
# CASCADE – NMR Chemical Shift Prediction
|
| 13 |
+
|
| 14 |
+
**CASCADE** (**C**hemic**A**l **S**hift **CA**lculation with **DE**ep learning) predicts ¹H and ¹³C NMR chemical shifts for organic molecules using a stereochemically- and conformationally aware 3D Message Passing Graph Neural Network.
|
| 15 |
+
|
| 16 |
+
Developed in the [Paton group](https://patonlab.colostate.edu) at Colorado State University.
|
| 17 |
+
|
| 18 |
+
## Usage
|
| 19 |
+
|
| 20 |
+
Open the app, enter a SMILES string (or use the Draw button), select ¹H or ¹³C, and click Submit. Predicted shifts are Boltzmann-weighted averages over an MMFF94s conformer ensemble.
|
| 21 |
+
|
| 22 |
+
**Scope:** Neutral organic molecules containing C, H, N, O, S, P, F, Cl.
|
| 23 |
+
|
| 24 |
+
## Citation
|
| 25 |
+
|
| 26 |
+
Guan, Y.; Sowndarya, S. V. S.; Gallegos, L. C.; St. John, P. C.; Paton, R. S.
|
| 27 |
+
*Chem. Sci.* **2021**, *12*, 12012–12026.
|
| 28 |
+
[DOI: 10.1039/D1SC03343C](https://doi.org/10.1039/D1SC03343C)
|
| 29 |
+
|
| 30 |
+
## Source code
|
| 31 |
+
|
| 32 |
+
[github.com/patonlab/CASCADE](https://github.com/patonlab/CASCADE)
|
app.py
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
CASCADE – Flask app for HF Spaces
|
| 3 |
+
Mounted at /cascade_v1/
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import json
|
| 8 |
+
import uuid
|
| 9 |
+
|
| 10 |
+
import redis
|
| 11 |
+
from flask import Flask, Blueprint, request, jsonify, render_template, send_file, abort, redirect, Response
|
| 12 |
+
|
| 13 |
+
from rdkit import Chem
|
| 14 |
+
from rdkit.Chem import AllChem
|
| 15 |
+
from rdkit.Chem.Draw import rdMolDraw2D
|
| 16 |
+
|
| 17 |
+
from NMR_Prediction.valid import validate_smiles
|
| 18 |
+
|
| 19 |
+
bp = Blueprint("cascade", __name__)
|
| 20 |
+
|
| 21 |
+
redis_client = redis.StrictRedis(
|
| 22 |
+
host="localhost", port=6379, db=0, decode_responses=True
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
# ── Pages ─────────────────────────────────────────────────────────────────────
|
| 26 |
+
@bp.route("/")
|
| 27 |
+
@bp.route("")
|
| 28 |
+
@bp.route("/predict/")
|
| 29 |
+
@bp.route("/predict")
|
| 30 |
+
@bp.route("/home/")
|
| 31 |
+
@bp.route("/about/")
|
| 32 |
+
def predict():
|
| 33 |
+
return render_template("cascade/predict.html")
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
# ── Job submission ────────────────────────────────────────────────────────────
|
| 37 |
+
def _submit_job(smiles, type_):
|
| 38 |
+
if not validate_smiles(smiles):
|
| 39 |
+
return jsonify({"message": "Input molecule is not allowed", "task_id": None})
|
| 40 |
+
task_id = uuid.uuid4().hex
|
| 41 |
+
redis_client.set(f"task_detail_{task_id}",
|
| 42 |
+
json.dumps({"smiles": smiles, "type_": type_}))
|
| 43 |
+
redis_client.rpush("task_queue", task_id)
|
| 44 |
+
return task_id
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
@bp.route("/predict_NMR_C/", methods=["POST"])
|
| 48 |
+
def predict_NMR_C():
|
| 49 |
+
result = _submit_job(request.form["smiles"], request.form["type_"])
|
| 50 |
+
if not isinstance(result, str):
|
| 51 |
+
return result
|
| 52 |
+
return jsonify({"message": "Molecule submitted to C queue", "task_id": result})
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
@bp.route("/predict_NMR_H/", methods=["POST"])
|
| 56 |
+
def predict_NMR_H():
|
| 57 |
+
result = _submit_job(request.form["smiles"], request.form["type_"])
|
| 58 |
+
if not isinstance(result, str):
|
| 59 |
+
return result
|
| 60 |
+
return jsonify({"message": "Molecule submitted to H queue", "task_id": result})
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
# ── check_task ────────────────────────────────────────────────────────────────
|
| 64 |
+
@bp.route("/check_task/")
|
| 65 |
+
def check_task():
|
| 66 |
+
raw = redis_client.get(f"task_result_{request.args['task_id']}")
|
| 67 |
+
if not raw:
|
| 68 |
+
return "running", 200
|
| 69 |
+
result = json.loads(raw)
|
| 70 |
+
if "errMessage" in result:
|
| 71 |
+
return "Error1", 200
|
| 72 |
+
return "done", 200
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
# ── get_result ────────────────────────────────────────────────────────────────
|
| 76 |
+
@bp.route("/get_result/")
|
| 77 |
+
def get_result():
|
| 78 |
+
task_id = request.args["task_id"]
|
| 79 |
+
raw = redis_client.get(f"task_result_{task_id}")
|
| 80 |
+
if not raw:
|
| 81 |
+
return jsonify({"error": "Result not found"}), 404
|
| 82 |
+
|
| 83 |
+
result = json.loads(raw)
|
| 84 |
+
if "errMessage" in result:
|
| 85 |
+
return jsonify({"error": result["errMessage"]}), 500
|
| 86 |
+
|
| 87 |
+
smiles = result["smiles"]
|
| 88 |
+
nucleus = result.get("type_", "C") # "C" or "H"
|
| 89 |
+
|
| 90 |
+
weighted_shift_txt = result["weightedShiftTxt"]
|
| 91 |
+
shift_map = {}
|
| 92 |
+
for item in filter(None, weighted_shift_txt.split(";")):
|
| 93 |
+
parts = item.split(",")
|
| 94 |
+
if len(parts) == 2:
|
| 95 |
+
shift_map[int(parts[0])] = parts[1]
|
| 96 |
+
|
| 97 |
+
if nucleus == "H":
|
| 98 |
+
# ¹H: draw with explicit H so H atoms are visible
|
| 99 |
+
mol = Chem.MolFromSmiles(smiles)
|
| 100 |
+
mol = Chem.AddHs(mol)
|
| 101 |
+
AllChem.Compute2DCoords(mol)
|
| 102 |
+
mol_draw = rdMolDraw2D.PrepareMolForDrawing(mol, kekulize=True)
|
| 103 |
+
n_label = mol.GetNumAtoms()
|
| 104 |
+
drawer = rdMolDraw2D.MolDraw2DSVG(700, 500)
|
| 105 |
+
else:
|
| 106 |
+
# ¹³C: skeletal structure, no explicit H
|
| 107 |
+
mol = Chem.MolFromSmiles(smiles)
|
| 108 |
+
AllChem.Compute2DCoords(mol)
|
| 109 |
+
mol_draw = rdMolDraw2D.PrepareMolForDrawing(mol, kekulize=True)
|
| 110 |
+
n_label = mol.GetNumAtoms()
|
| 111 |
+
drawer = rdMolDraw2D.MolDraw2DSVG(600, 450)
|
| 112 |
+
|
| 113 |
+
opts = drawer.drawOptions()
|
| 114 |
+
for atom_1idx, shift_val in shift_map.items():
|
| 115 |
+
atom_0idx = atom_1idx - 1
|
| 116 |
+
if atom_0idx < n_label:
|
| 117 |
+
opts.atomLabels[atom_0idx] = shift_val
|
| 118 |
+
opts.clearBackground = False
|
| 119 |
+
opts.bondLineWidth = 1
|
| 120 |
+
opts.padding = 0.15
|
| 121 |
+
opts.additionalAtomLabelPadding = 0.1
|
| 122 |
+
|
| 123 |
+
drawer.DrawMolecule(mol_draw)
|
| 124 |
+
drawer.FinishDrawing()
|
| 125 |
+
svg = drawer.GetDrawingText().replace("svg:", "").replace(":svg", "")
|
| 126 |
+
|
| 127 |
+
return jsonify({
|
| 128 |
+
"svg": svg,
|
| 129 |
+
"smiles": smiles,
|
| 130 |
+
"nucleus": nucleus,
|
| 131 |
+
"conf_sdfs": result.get("conf_sdfs", []),
|
| 132 |
+
"weightedShift": weighted_shift_txt,
|
| 133 |
+
"confShift": result["confShiftTxt"],
|
| 134 |
+
"relative_E": result["relative_E"],
|
| 135 |
+
"taskId": task_id,
|
| 136 |
+
})
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
# ── Download as CSV ───────────────────────────────────────────────────────────
|
| 140 |
+
@bp.route("/download/<task_id>/")
|
| 141 |
+
def download(task_id):
|
| 142 |
+
raw = redis_client.get(f"task_result_{task_id}")
|
| 143 |
+
if not raw:
|
| 144 |
+
abort(404)
|
| 145 |
+
result = json.loads(raw)
|
| 146 |
+
if "errMessage" in result:
|
| 147 |
+
abort(404)
|
| 148 |
+
|
| 149 |
+
nucleus = result.get("type_", "C")
|
| 150 |
+
header = f"Atom Index,Predicted {'1H' if nucleus == 'H' else '13C'} Shift (ppm)"
|
| 151 |
+
lines = [header]
|
| 152 |
+
for item in filter(None, result["weightedShiftTxt"].split(";")):
|
| 153 |
+
parts = item.split(",")
|
| 154 |
+
if len(parts) == 2:
|
| 155 |
+
lines.append(f"{parts[0]},{parts[1]}")
|
| 156 |
+
|
| 157 |
+
return Response(
|
| 158 |
+
"\n".join(lines),
|
| 159 |
+
mimetype="text/csv",
|
| 160 |
+
headers={"Content-Disposition": f"attachment; filename=cascade_{task_id[:8]}.csv"}
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
def create_app():
|
| 165 |
+
app = Flask(__name__, static_folder="static", template_folder="templates")
|
| 166 |
+
app.register_blueprint(bp, url_prefix="/cascade_v1")
|
| 167 |
+
|
| 168 |
+
@app.route("/")
|
| 169 |
+
@app.route("/cascade_v1")
|
| 170 |
+
def root():
|
| 171 |
+
return redirect("/cascade_v1/predict/")
|
| 172 |
+
|
| 173 |
+
return app
|
| 174 |
+
|
| 175 |
+
app = create_app()
|
| 176 |
+
|
| 177 |
+
if __name__ == "__main__":
|
| 178 |
+
app.run(host="0.0.0.0", port=7860, debug=False)
|
nfp/__init__.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .layers import *
|
| 2 |
+
from .models import *
|
| 3 |
+
|
| 4 |
+
custom_layers = {
|
| 5 |
+
'MessageLayer': MessageLayer,
|
| 6 |
+
'EdgeNetwork': EdgeNetwork,
|
| 7 |
+
'ReduceAtomToMol': ReduceAtomToMol,
|
| 8 |
+
'ReduceBondToAtom': ReduceBondToAtom,
|
| 9 |
+
'GatherAtomToBond': GatherAtomToBond,
|
| 10 |
+
'GRUStep': GRUStep,
|
| 11 |
+
'Embedding2D': Embedding2D,
|
| 12 |
+
'Squeeze': Squeeze,
|
| 13 |
+
'GraphModel': GraphModel,
|
| 14 |
+
'masked_mean_squared_error': masked_mean_squared_error
|
| 15 |
+
}
|
nfp/layers/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .layers import *
|
| 2 |
+
from .utils import *
|
| 3 |
+
from .wrappers import *
|
nfp/layers/layers.py
ADDED
|
@@ -0,0 +1,445 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from keras.engine import Layer
|
| 2 |
+
|
| 3 |
+
from keras import activations
|
| 4 |
+
from keras import initializers
|
| 5 |
+
from keras import regularizers
|
| 6 |
+
from keras import constraints
|
| 7 |
+
from keras.layers import Lambda
|
| 8 |
+
|
| 9 |
+
import tensorflow as tf
|
| 10 |
+
import keras.backend as K
|
| 11 |
+
|
| 12 |
+
class MessageLayer(Layer):
|
| 13 |
+
""" Implements the matrix multiplication message functions from Gilmer
|
| 14 |
+
2017. This could probably be implemented as a series of other layers, but
|
| 15 |
+
this is more convenient.
|
| 16 |
+
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
def __init__(self, dropout=0., reducer=None, **kwargs):
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
dropout : float between 0 and 1
|
| 23 |
+
Whether to apply dropout to individual messages before they are
|
| 24 |
+
reduced to each incoming atom.
|
| 25 |
+
|
| 26 |
+
reducer : ['sum', 'mean', 'max', or 'min']
|
| 27 |
+
How to collect incoming messages for each atom. In this library,
|
| 28 |
+
I'm careful to only have messages be a function of the sending
|
| 29 |
+
atom, so we can sort the connectivity matrix by recieving atom.
|
| 30 |
+
That lets us use the `segment_*` methods from tensorflow, instead
|
| 31 |
+
of the `unsorted_segment_*` methods.
|
| 32 |
+
|
| 33 |
+
"""
|
| 34 |
+
|
| 35 |
+
self.dropout = dropout
|
| 36 |
+
self.reducer = reducer
|
| 37 |
+
|
| 38 |
+
reducer_dict = {
|
| 39 |
+
None: tf.segment_sum,
|
| 40 |
+
'sum': tf.segment_sum,
|
| 41 |
+
'mean': tf.segment_mean,
|
| 42 |
+
'max': tf.segment_max,
|
| 43 |
+
'min': tf.segment_min
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
self._reducer = reducer_dict[reducer]
|
| 47 |
+
|
| 48 |
+
super(MessageLayer, self).__init__(**kwargs)
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def call(self, inputs, training=None):
|
| 52 |
+
""" Perform a single message passing step, returing the summed messages
|
| 53 |
+
for each recieving atom.
|
| 54 |
+
|
| 55 |
+
Inputs are [atom_matrix, bond_matrix, connectivity_matrix]
|
| 56 |
+
|
| 57 |
+
atom_matrix : (num_atoms_in_batch, d)
|
| 58 |
+
The input matrix of current hidden states for each atom
|
| 59 |
+
|
| 60 |
+
bond_matrix : (num_bonds_in_batch, d, d)
|
| 61 |
+
A matrix of current edge features, with each edge represented as a
|
| 62 |
+
(dxd) matrix.
|
| 63 |
+
|
| 64 |
+
connectivity : (num_bonds_in_batch, 2)
|
| 65 |
+
A matrix of (a_i, a_j) pairs that indicates the bond in bond_matrix
|
| 66 |
+
connecting atom_matrix[a_j] to atom_matrix[a_i].
|
| 67 |
+
The first entry indicates the recieving atom.
|
| 68 |
+
|
| 69 |
+
"""
|
| 70 |
+
|
| 71 |
+
atom_matrix, bond_matrix, connectivity = inputs
|
| 72 |
+
|
| 73 |
+
# Gather the atom matrix so that each reciever node corresponds with
|
| 74 |
+
# the given bond_matrix entry
|
| 75 |
+
atom_gathered = tf.gather(atom_matrix, connectivity[:, 1])
|
| 76 |
+
|
| 77 |
+
# Multiply the bond matrices by the gathered atom matrices
|
| 78 |
+
messages = K.batch_dot(bond_matrix, atom_gathered)
|
| 79 |
+
|
| 80 |
+
# Add dropout on a message-by-message basis if desired
|
| 81 |
+
def add_dropout():
|
| 82 |
+
if 0. < self.dropout < 1.:
|
| 83 |
+
return K.dropout(messages, self.dropout)
|
| 84 |
+
else:
|
| 85 |
+
return messages
|
| 86 |
+
|
| 87 |
+
dropout_messages = K.in_train_phase(
|
| 88 |
+
add_dropout(), messages, training=training)
|
| 89 |
+
|
| 90 |
+
# Sum each message along the (sorted) reciever nodes
|
| 91 |
+
summed_message = self._reducer(dropout_messages, connectivity[:, 0])
|
| 92 |
+
|
| 93 |
+
return summed_message
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def compute_output_shape(self, input_shape):
|
| 97 |
+
""" Computes the shape of the output, which should be the same
|
| 98 |
+
dimension as the first input, that atom hidden state """
|
| 99 |
+
|
| 100 |
+
assert input_shape and len(input_shape) == 3
|
| 101 |
+
assert input_shape[0][-1] # atom hidden state dimension must be specified
|
| 102 |
+
return input_shape[0]
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def get_config(self):
|
| 106 |
+
config = {
|
| 107 |
+
'dropout': self.dropout,
|
| 108 |
+
'reducer': self.reducer,
|
| 109 |
+
}
|
| 110 |
+
base_config = super(MessageLayer, self).get_config()
|
| 111 |
+
return dict(list(base_config.items()) + list(config.items()))
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
class GatherAtomToBond(Layer):
|
| 115 |
+
""" Reshapes the atom matrix (num_atoms_in_batch, d) to the bond matrix
|
| 116 |
+
(num_bonds_in_batch, d) by reindexing according to which atom is involved
|
| 117 |
+
in each bond.
|
| 118 |
+
|
| 119 |
+
index : 0 or 1
|
| 120 |
+
whether to gather the sending atoms (1) or recieving atoms (0) for each
|
| 121 |
+
bond.
|
| 122 |
+
|
| 123 |
+
"""
|
| 124 |
+
|
| 125 |
+
def __init__(self, index, **kwargs):
|
| 126 |
+
self.index = index
|
| 127 |
+
super(GatherAtomToBond, self).__init__(**kwargs)
|
| 128 |
+
|
| 129 |
+
def call(self, inputs):
|
| 130 |
+
atom_matrix, connectivity = inputs
|
| 131 |
+
return tf.gather(atom_matrix, connectivity[:, self.index])
|
| 132 |
+
|
| 133 |
+
def compute_output_shape(self, input_shape):
|
| 134 |
+
""" Computes the shape of the output,
|
| 135 |
+
which should be the shape of the atom matrix with the length
|
| 136 |
+
of the bond matrix """
|
| 137 |
+
|
| 138 |
+
assert input_shape and len(input_shape) == 2
|
| 139 |
+
return input_shape[0]
|
| 140 |
+
|
| 141 |
+
def get_config(self):
|
| 142 |
+
config = {
|
| 143 |
+
'index': self.index,
|
| 144 |
+
}
|
| 145 |
+
base_config = super(GatherAtomToBond, self).get_config()
|
| 146 |
+
return dict(list(base_config.items()) + list(config.items()))
|
| 147 |
+
|
| 148 |
+
class Reducer(Layer):
|
| 149 |
+
""" Superclass for reducing methods.
|
| 150 |
+
|
| 151 |
+
reducer : ['sum', 'mean', 'max', or 'min']
|
| 152 |
+
How to collect elements for each atom or molecule. In this library,
|
| 153 |
+
I'm careful to only have messages be a function of the sending
|
| 154 |
+
atom, so we can sort the connectivity matrix by recieving atom.
|
| 155 |
+
That lets us use the `segment_*` methods from tensorflow, instead
|
| 156 |
+
of the `unsorted_segment_*` methods.
|
| 157 |
+
|
| 158 |
+
"""
|
| 159 |
+
|
| 160 |
+
def __init__(self, reducer=None, **kwargs):
|
| 161 |
+
|
| 162 |
+
self.reducer = reducer
|
| 163 |
+
|
| 164 |
+
reducer_dict = {
|
| 165 |
+
None: tf.segment_sum,
|
| 166 |
+
'sum': tf.segment_sum,
|
| 167 |
+
'unsorted_sum': tf.unsorted_segment_sum,
|
| 168 |
+
'mean': tf.segment_mean,
|
| 169 |
+
'unsorted_mean': tf.unsorted_segment_mean,
|
| 170 |
+
'max': tf.segment_max,
|
| 171 |
+
'min': tf.segment_min
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
self._reducer = reducer_dict[reducer]
|
| 175 |
+
|
| 176 |
+
super(Reducer, self).__init__(**kwargs)
|
| 177 |
+
|
| 178 |
+
def get_config(self):
|
| 179 |
+
config = {'reducer': self.reducer}
|
| 180 |
+
base_config = super(Reducer, self).get_config()
|
| 181 |
+
return dict(list(base_config.items()) + list(config.items()))
|
| 182 |
+
|
| 183 |
+
def compute_output_shape(self, input_shape):
|
| 184 |
+
assert input_shape and len(input_shape) == 2
|
| 185 |
+
# Output shape is (n_graphs, atom_dim)
|
| 186 |
+
return input_shape[0]
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
class ReduceAtomToMol(Reducer):
|
| 190 |
+
""" Sum over all atoms in each molecule.
|
| 191 |
+
|
| 192 |
+
Inputs
|
| 193 |
+
|
| 194 |
+
atom_matrix : (num_atoms_in_batch, d)
|
| 195 |
+
atom hidden states for each atom in the batch
|
| 196 |
+
|
| 197 |
+
node_graph_indices : (num_atoms_in_batch,)
|
| 198 |
+
A scalar for each atom representing which molecule in the batch the
|
| 199 |
+
atom belongs to. This is generated by the preprocessor class, and
|
| 200 |
+
essentially looks like [0, 0, 0, 1, 1] for a batch with a 3 atom
|
| 201 |
+
molecule and a 2 atom molecule.
|
| 202 |
+
"""
|
| 203 |
+
|
| 204 |
+
def call(self, inputs):
|
| 205 |
+
|
| 206 |
+
atom_matrix, node_graph_indices = inputs
|
| 207 |
+
return self._reducer(atom_matrix, node_graph_indices)
|
| 208 |
+
|
| 209 |
+
class ReduceBondToPro(Reducer):
|
| 210 |
+
"""
|
| 211 |
+
Sums over bonds acoording to bond_index to get target bond properties
|
| 212 |
+
|
| 213 |
+
Inputs
|
| 214 |
+
|
| 215 |
+
bond_matrix : (num_bonds_in_batch, d)
|
| 216 |
+
bond hidden states for each bond in the batch
|
| 217 |
+
|
| 218 |
+
bond_index : (bond_atoms_in_batch, )
|
| 219 |
+
A scalar for each bond representing which number in the target property
|
| 220 |
+
the bond links to. This is generated by the preprocessor class, and
|
| 221 |
+
essentially looks like [-1,-1,0,0,-1,-1,1....]
|
| 222 |
+
"""
|
| 223 |
+
|
| 224 |
+
def call(self, inputs):
|
| 225 |
+
|
| 226 |
+
bond_matrix, bond_index, n_pro = inputs
|
| 227 |
+
num_segments = tf.reduce_sum(n_pro)
|
| 228 |
+
return self._reducer(bond_matrix, bond_index, num_segments)
|
| 229 |
+
|
| 230 |
+
def compute_output_shape(self, input_shape):
|
| 231 |
+
assert input_shape and len(input_shape) == 3
|
| 232 |
+
return input_shape[0]
|
| 233 |
+
|
| 234 |
+
class ReduceAtomToPro(Reducer):
|
| 235 |
+
"""
|
| 236 |
+
Sums over atoms acoording to atom_index to get target atom properties
|
| 237 |
+
|
| 238 |
+
Inputs
|
| 239 |
+
|
| 240 |
+
atom_matrix : (num_atoms_in_batch, d)
|
| 241 |
+
atom hidden states for each atom in the batch
|
| 242 |
+
|
| 243 |
+
atom_index : (atom_atoms_in_batch, )
|
| 244 |
+
A scalar for each atom representing which number in the target property
|
| 245 |
+
the atom links to. This is generated by the preprocessor class, and
|
| 246 |
+
essentially looks like [-1,-1,0,0,-1,-1,1....]
|
| 247 |
+
"""
|
| 248 |
+
|
| 249 |
+
def call(self, inputs):
|
| 250 |
+
|
| 251 |
+
atom_matrix, atom_index, n_pro = inputs
|
| 252 |
+
num_segments = tf.reduce_sum(n_pro)
|
| 253 |
+
return self._reducer(atom_matrix, atom_index, num_segments)
|
| 254 |
+
|
| 255 |
+
def compute_output_shape(self, input_shape):
|
| 256 |
+
assert input_shape and len(input_shape) == 3
|
| 257 |
+
return input_shape[0]
|
| 258 |
+
|
| 259 |
+
class ReduceBondToAtom(Reducer):
|
| 260 |
+
|
| 261 |
+
""" Sums over the incoming messages from all sender atoms.
|
| 262 |
+
|
| 263 |
+
Inputs:
|
| 264 |
+
|
| 265 |
+
bond_matrix : (num_bonds_in_batch, d)
|
| 266 |
+
A matrix of messages coming from each sender atom; one row for each
|
| 267 |
+
bond/edge.
|
| 268 |
+
|
| 269 |
+
connectivity : (num_bonds_in_batch, 2)
|
| 270 |
+
A matrix of (a_i, a_j) pairs that indicates the bond in bond_matrix
|
| 271 |
+
connecting atom_matrix[a_j] to atom_matrix[a_i].
|
| 272 |
+
The first entry indicates the recieving atom.
|
| 273 |
+
|
| 274 |
+
Again, I'm careful to only have the messages be a function of the sending
|
| 275 |
+
node, such that we can use sorted methods in performing the reduction.
|
| 276 |
+
|
| 277 |
+
"""
|
| 278 |
+
|
| 279 |
+
def call(self, inputs):
|
| 280 |
+
|
| 281 |
+
bond_matrix, connectivity = inputs
|
| 282 |
+
return self._reducer(bond_matrix, connectivity[:, 0])
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
class Squeeze(Layer):
|
| 286 |
+
""" Keras forces inputs to be a vector per entry, so this layer squeezes
|
| 287 |
+
them to a single dimension.
|
| 288 |
+
|
| 289 |
+
I.e., node_graph_indices will have shape (num_atoms_in_batch, 1), while its
|
| 290 |
+
easier to work with a vector of shape (num_atoms_in_batch,)
|
| 291 |
+
"""
|
| 292 |
+
|
| 293 |
+
def call(self, inputs):
|
| 294 |
+
return K.squeeze(inputs, 1)
|
| 295 |
+
|
| 296 |
+
def compute_output_shape(self, input_shape):
|
| 297 |
+
return input_shape[:-1]
|
| 298 |
+
|
| 299 |
+
|
| 300 |
+
class Embedding2D(Layer):
|
| 301 |
+
""" Keras typically wants to embed items as a single vector, while for the
|
| 302 |
+
matrix multiplication method of Gilmer 2017 we need a matrix for each bond
|
| 303 |
+
type. This just implements that fairly simple extension of the traditional
|
| 304 |
+
embedding layer.
|
| 305 |
+
"""
|
| 306 |
+
|
| 307 |
+
def __init__(self, input_dim, output_dim,
|
| 308 |
+
embeddings_initializer='uniform',
|
| 309 |
+
embeddings_regularizer=None,
|
| 310 |
+
embeddings_constraint=None,
|
| 311 |
+
**kwargs):
|
| 312 |
+
|
| 313 |
+
super(Embedding2D, self).__init__(**kwargs)
|
| 314 |
+
|
| 315 |
+
self.input_dim = input_dim
|
| 316 |
+
self.output_dim = output_dim
|
| 317 |
+
|
| 318 |
+
self.embeddings_initializer = initializers.get(embeddings_initializer)
|
| 319 |
+
self.embeddings_regularizer = regularizers.get(embeddings_regularizer)
|
| 320 |
+
self.embeddings_constraint = constraints.get(embeddings_constraint)
|
| 321 |
+
|
| 322 |
+
def build(self, input_shape):
|
| 323 |
+
|
| 324 |
+
self.embeddings = self.add_weight(
|
| 325 |
+
shape=(self.input_dim, self.output_dim, self.output_dim),
|
| 326 |
+
initializer=self.embeddings_initializer,
|
| 327 |
+
name='bond_embedding_weights',
|
| 328 |
+
regularizer=self.embeddings_regularizer,
|
| 329 |
+
constraint=self.embeddings_constraint)
|
| 330 |
+
|
| 331 |
+
self.built = True
|
| 332 |
+
|
| 333 |
+
def call(self, inputs):
|
| 334 |
+
return tf.nn.embedding_lookup(self.embeddings, inputs)
|
| 335 |
+
|
| 336 |
+
def compute_output_shape(self, input_shape):
|
| 337 |
+
return (input_shape[0], self.output_dim, self.output_dim)
|
| 338 |
+
|
| 339 |
+
def get_config(self):
|
| 340 |
+
config = {
|
| 341 |
+
'input_dim': self.input_dim,
|
| 342 |
+
'output_dim': self.output_dim,
|
| 343 |
+
'embeddings_initializer':
|
| 344 |
+
initializers.serialize(self.embeddings_initializer),
|
| 345 |
+
'embeddings_regularizer':
|
| 346 |
+
regularizers.serialize(self.embeddings_regularizer),
|
| 347 |
+
'embeddings_constraint':
|
| 348 |
+
constraints.serialize(self.embeddings_constraint),
|
| 349 |
+
}
|
| 350 |
+
base_config = super(Embedding2D, self).get_config()
|
| 351 |
+
return dict(list(base_config.items()) + list(config.items()))
|
| 352 |
+
|
| 353 |
+
|
| 354 |
+
class EdgeNetwork(Layer):
|
| 355 |
+
""" A layer to embed (bond_type, distance) pairs as a NxN matrix.
|
| 356 |
+
|
| 357 |
+
Inputs:
|
| 358 |
+
units : dimension of the output matrix
|
| 359 |
+
bond_classes : number of unique bonds
|
| 360 |
+
|
| 361 |
+
First perfoms a 1-hot encoding of the bond_type, then passes the
|
| 362 |
+
(*one_hot_encoding, distance) vector to a dense layer. This is the "Edge
|
| 363 |
+
Network" message described by Gilmer, 2017.
|
| 364 |
+
|
| 365 |
+
"""
|
| 366 |
+
|
| 367 |
+
def __init__(self, units, bond_classes,
|
| 368 |
+
activation=None,
|
| 369 |
+
use_bias=True,
|
| 370 |
+
kernel_initializer='glorot_uniform',
|
| 371 |
+
bias_initializer='zeros',
|
| 372 |
+
kernel_regularizer=None,
|
| 373 |
+
bias_regularizer=None,
|
| 374 |
+
activity_regularizer=None,
|
| 375 |
+
kernel_constraint=None,
|
| 376 |
+
bias_constraint=None,
|
| 377 |
+
**kwargs):
|
| 378 |
+
|
| 379 |
+
super(EdgeNetwork, self).__init__(**kwargs)
|
| 380 |
+
self.units = units
|
| 381 |
+
self.bond_classes = bond_classes
|
| 382 |
+
|
| 383 |
+
self.activation = activations.get(activation)
|
| 384 |
+
self.use_bias = use_bias
|
| 385 |
+
self.kernel_initializer = initializers.get(kernel_initializer)
|
| 386 |
+
self.bias_initializer = initializers.get(bias_initializer)
|
| 387 |
+
self.kernel_regularizer = regularizers.get(kernel_regularizer)
|
| 388 |
+
self.bias_regularizer = regularizers.get(bias_regularizer)
|
| 389 |
+
self.activity_regularizer = regularizers.get(activity_regularizer)
|
| 390 |
+
self.kernel_constraint = constraints.get(kernel_constraint)
|
| 391 |
+
self.bias_constraint = constraints.get(bias_constraint)
|
| 392 |
+
|
| 393 |
+
def build(self, input_shape):
|
| 394 |
+
|
| 395 |
+
self.kernel = self.add_weight(shape=(self.bond_classes + 1, self.units**2),
|
| 396 |
+
initializer=self.kernel_initializer,
|
| 397 |
+
name='kernel',
|
| 398 |
+
regularizer=self.kernel_regularizer,
|
| 399 |
+
constraint=self.kernel_constraint)
|
| 400 |
+
if self.use_bias:
|
| 401 |
+
self.bias = self.add_weight(shape=(self.units**2,),
|
| 402 |
+
initializer=self.bias_initializer,
|
| 403 |
+
name='bias',
|
| 404 |
+
regularizer=self.bias_regularizer,
|
| 405 |
+
constraint=self.bias_constraint)
|
| 406 |
+
else:
|
| 407 |
+
self.bias = None
|
| 408 |
+
|
| 409 |
+
self.built = True
|
| 410 |
+
|
| 411 |
+
def call(self, inputs):
|
| 412 |
+
|
| 413 |
+
bond_type, distance = inputs
|
| 414 |
+
bond_type_onehot = tf.one_hot(tf.squeeze(bond_type), self.bond_classes)
|
| 415 |
+
stacked_inputs = tf.concat([bond_type_onehot, distance], 1)
|
| 416 |
+
|
| 417 |
+
output = K.dot(stacked_inputs, self.kernel)
|
| 418 |
+
if self.use_bias:
|
| 419 |
+
output = K.bias_add(output, self.bias, data_format='channels_last')
|
| 420 |
+
if self.activation is not None:
|
| 421 |
+
output = self.activation(output)
|
| 422 |
+
|
| 423 |
+
output = tf.reshape(output, [-1, self.units, self.units])
|
| 424 |
+
return output
|
| 425 |
+
|
| 426 |
+
def compute_output_shape(self, input_shape):
|
| 427 |
+
return (input_shape[0][0], self.units, self.units)
|
| 428 |
+
|
| 429 |
+
def get_config(self):
|
| 430 |
+
config = {
|
| 431 |
+
'units': self.units,
|
| 432 |
+
'bond_classes': self.bond_classes,
|
| 433 |
+
'activation': activations.serialize(self.activation),
|
| 434 |
+
'use_bias': self.use_bias,
|
| 435 |
+
'kernel_initializer': initializers.serialize(self.kernel_initializer),
|
| 436 |
+
'bias_initializer': initializers.serialize(self.bias_initializer),
|
| 437 |
+
'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),
|
| 438 |
+
'bias_regularizer': regularizers.serialize(self.bias_regularizer),
|
| 439 |
+
'activity_regularizer':
|
| 440 |
+
regularizers.serialize(self.activity_regularizer),
|
| 441 |
+
'kernel_constraint': constraints.serialize(self.kernel_constraint),
|
| 442 |
+
'bias_constraint': constraints.serialize(self.bias_constraint)
|
| 443 |
+
}
|
| 444 |
+
base_config = super(EdgeNetwork, self).get_config()
|
| 445 |
+
return dict(list(base_config.items()) + list(config.items()))
|
nfp/layers/utils.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
""" Unused for the moment, I believe. """
|
| 2 |
+
|
| 3 |
+
import tensorflow as tf
|
| 4 |
+
|
| 5 |
+
def get_shape(tensor):
|
| 6 |
+
"""Returns the tensor's shape.
|
| 7 |
+
Each shape element is either:
|
| 8 |
+
- an `int`, when static shape values are available, or
|
| 9 |
+
- a `tf.Tensor`, when the shape is dynamic.
|
| 10 |
+
Args:
|
| 11 |
+
tensor: A `tf.Tensor` to get the shape of.
|
| 12 |
+
Returns:
|
| 13 |
+
The `list` which contains the tensor's shape.
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
shape_list = tensor.shape.as_list()
|
| 17 |
+
if all(s is not None for s in shape_list):
|
| 18 |
+
return shape_list
|
| 19 |
+
|
| 20 |
+
shape_tensor = tf.shape(tensor)
|
| 21 |
+
return [shape_tensor[i] if s is None else s for i, s in
|
| 22 |
+
enumerate(shape_list)]
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def repeat(tensor, repeats, axis=0):
|
| 26 |
+
"""Repeats a `tf.Tensor`'s elements along an axis by custom amounts.
|
| 27 |
+
Equivalent to Numpy's `np.repeat`.
|
| 28 |
+
`tensor and `repeats` must have the same numbers of elements along `axis`.
|
| 29 |
+
Args:
|
| 30 |
+
tensor: A `tf.Tensor` to repeat.
|
| 31 |
+
repeats: A 1D sequence of the number of repeats per element.
|
| 32 |
+
axis: An axis to repeat along. Defaults to 0.
|
| 33 |
+
name: (string, optional) A name for the operation.
|
| 34 |
+
Returns:
|
| 35 |
+
The `tf.Tensor` with repeated values.
|
| 36 |
+
"""
|
| 37 |
+
|
| 38 |
+
cumsum = tf.cumsum(repeats)
|
| 39 |
+
range_ = tf.range(cumsum[-1])
|
| 40 |
+
|
| 41 |
+
indicator_matrix = tf.cast(tf.expand_dims(range_, 1) >= cumsum, tf.int32)
|
| 42 |
+
indices = tf.reduce_sum(indicator_matrix, reduction_indices=1)
|
| 43 |
+
|
| 44 |
+
shifted_tensor = _axis_to_inside(tensor, axis)
|
| 45 |
+
repeated_shifted_tensor = tf.gather(shifted_tensor, indices)
|
| 46 |
+
repeated_tensor = _inside_to_axis(repeated_shifted_tensor, axis)
|
| 47 |
+
|
| 48 |
+
shape = tensor.shape.as_list()
|
| 49 |
+
shape[axis] = None
|
| 50 |
+
repeated_tensor.set_shape(shape)
|
| 51 |
+
|
| 52 |
+
return repeated_tensor
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def _axis_to_inside(tensor, axis):
|
| 56 |
+
"""Shifts a given axis of a tensor to be the innermost axis.
|
| 57 |
+
Args:
|
| 58 |
+
tensor: A `tf.Tensor` to shift.
|
| 59 |
+
axis: An `int` or `tf.Tensor` that indicates which axis to shift.
|
| 60 |
+
Returns:
|
| 61 |
+
The shifted tensor.
|
| 62 |
+
"""
|
| 63 |
+
|
| 64 |
+
axis = tf.convert_to_tensor(axis)
|
| 65 |
+
rank = tf.rank(tensor)
|
| 66 |
+
|
| 67 |
+
range0 = tf.range(0, limit=axis)
|
| 68 |
+
range1 = tf.range(tf.add(axis, 1), limit=rank)
|
| 69 |
+
perm = tf.concat([[axis], range0, range1], 0)
|
| 70 |
+
|
| 71 |
+
return tf.transpose(tensor, perm=perm)
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def _inside_to_axis(tensor, axis):
|
| 75 |
+
"""Shifts the innermost axis of a tensor to some other axis.
|
| 76 |
+
Args:
|
| 77 |
+
tensor: A `tf.Tensor` to shift.
|
| 78 |
+
axis: An `int` or `tf.Tensor` that indicates which axis to shift.
|
| 79 |
+
Returns:
|
| 80 |
+
The shifted tensor.
|
| 81 |
+
"""
|
| 82 |
+
|
| 83 |
+
axis = tf.convert_to_tensor(axis)
|
| 84 |
+
rank = tf.rank(tensor)
|
| 85 |
+
|
| 86 |
+
range0 = tf.range(1, limit=axis + 1)
|
| 87 |
+
range1 = tf.range(tf.add(axis, 1), limit=rank)
|
| 88 |
+
perm = tf.concat([range0, [0], range1], 0)
|
| 89 |
+
|
| 90 |
+
return tf.transpose(tensor, perm=perm)
|
nfp/layers/wrappers.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
These classes wrap the GRUCell and LSTMCell keras layers, allowing weight-tying
|
| 3 |
+
between RNNs for each atom. Essentially, Keras expects inputs to an RNN to be
|
| 4 |
+
of the shape (batch_size, sequence_length, features), while our inputs are of
|
| 5 |
+
shape (num_atoms_per_batch, features).
|
| 6 |
+
|
| 7 |
+
Peter, 3/20/2018
|
| 8 |
+
|
| 9 |
+
I should revisit whether these are needed post graph_nets update.
|
| 10 |
+
|
| 11 |
+
Peter, 11/9/2018.
|
| 12 |
+
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
from keras.layers import GRUCell, LSTMCell
|
| 16 |
+
|
| 17 |
+
class GRUStep(GRUCell):
|
| 18 |
+
def build(self, input_shape):
|
| 19 |
+
GRUCell.build(self, input_shape[0])
|
| 20 |
+
|
| 21 |
+
def call(self, inputs):
|
| 22 |
+
""" Inputs should be [message, previous_state], returns [next_state]
|
| 23 |
+
"""
|
| 24 |
+
return GRUCell.call(self, inputs[0], [inputs[1]])[0]
|
| 25 |
+
|
| 26 |
+
def compute_output_shape(self, input_shape):
|
| 27 |
+
return input_shape[0]
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class LSTMStep(LSTMCell):
|
| 31 |
+
def build(self, input_shape):
|
| 32 |
+
LSTMCell.build(self, input_shape[0])
|
| 33 |
+
|
| 34 |
+
def call(self, inputs):
|
| 35 |
+
""" Inputs should be [message, previous_state, previous_memory],
|
| 36 |
+
returns [next_state, next_memory]
|
| 37 |
+
"""
|
| 38 |
+
outputs = LSTMCell.call(self, inputs[0], [inputs[1], inputs[2]])
|
| 39 |
+
return [outputs[0], outputs[1][1]]
|
| 40 |
+
|
| 41 |
+
def compute_mask(self, inputs, mask):
|
| 42 |
+
return [None] * 2
|
| 43 |
+
|
| 44 |
+
def compute_output_shape(self, input_shape):
|
| 45 |
+
return [input_shape[0], input_shape[0]]
|
nfp/models/__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .models import *
|
| 2 |
+
from .losses import *
|
nfp/models/losses.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
""" Keras loss functions will choke on NaN inputs, which we'll often have for
|
| 2 |
+
unspecified inputs. These are just a couple simple loss functions that mask NaN
|
| 3 |
+
values in both the test and predicted tensors when computing the cost. """
|
| 4 |
+
|
| 5 |
+
import keras.backend as K
|
| 6 |
+
import tensorflow as tf
|
| 7 |
+
|
| 8 |
+
def masked_mean_squared_error(y_true, y_pred):
|
| 9 |
+
mask = tf.is_finite(y_true)
|
| 10 |
+
y_true_mask = tf.boolean_mask(y_true, mask)
|
| 11 |
+
y_pred_mask = tf.boolean_mask(y_pred, mask)
|
| 12 |
+
return K.mean(K.square(y_pred_mask - y_true_mask), axis=-1)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def masked_mean_absolute_error(y_true, y_pred):
|
| 16 |
+
mask = tf.is_finite(y_true)
|
| 17 |
+
y_true_mask = tf.boolean_mask(y_true, mask)
|
| 18 |
+
y_pred_mask = tf.boolean_mask(y_pred, mask)
|
| 19 |
+
return K.mean(K.abs(y_pred_mask - y_true_mask), axis=-1)
|
nfp/models/models.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from keras.models import Model
|
| 2 |
+
|
| 3 |
+
class GraphModel(Model):
|
| 4 |
+
""" This is a simple modification of the Keras `Model` class to avoid
|
| 5 |
+
checking each input for a consistent batch_size dimension. Should work as
|
| 6 |
+
of keras-team/keras#11548.
|
| 7 |
+
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
def _standardize_user_data(self, *args, **kwargs):
|
| 11 |
+
kwargs['check_array_lengths'] = False
|
| 12 |
+
return super(GraphModel, self)._standardize_user_data(*args, **kwargs)
|
nfp/preprocessing/__init__.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .preprocessor import *
|
| 2 |
+
from .features import *
|
| 3 |
+
from .scaling import *
|
| 4 |
+
from .sequence import *
|
nfp/preprocessing/features.py
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class Tokenizer(object):
|
| 2 |
+
""" A class to turn arbitrary inputs into integer classes. """
|
| 3 |
+
|
| 4 |
+
def __init__(self):
|
| 5 |
+
# the default class for an unseen entry during test-time
|
| 6 |
+
self._data = {'unk': 1}
|
| 7 |
+
self.num_classes = 1
|
| 8 |
+
self.train = True
|
| 9 |
+
self.unknown = []
|
| 10 |
+
|
| 11 |
+
def __call__(self, item):
|
| 12 |
+
""" Check to see if the Tokenizer has seen `item` before, and if so,
|
| 13 |
+
return the integer class associated with it. Otherwise, if we're
|
| 14 |
+
training, create a new integer class, otherwise return the 'unknown'
|
| 15 |
+
class.
|
| 16 |
+
|
| 17 |
+
"""
|
| 18 |
+
try:
|
| 19 |
+
return self._data[item]
|
| 20 |
+
|
| 21 |
+
except KeyError:
|
| 22 |
+
if self.train:
|
| 23 |
+
self._add_token(item)
|
| 24 |
+
return self(item)
|
| 25 |
+
|
| 26 |
+
else:
|
| 27 |
+
# Record the unknown item, then return the unknown label
|
| 28 |
+
self.unknown += [item]
|
| 29 |
+
return self._data['unk']
|
| 30 |
+
|
| 31 |
+
def _add_token(self, item):
|
| 32 |
+
self.num_classes += 1
|
| 33 |
+
self._data[item] = self.num_classes
|
| 34 |
+
|
| 35 |
+
# The rest of the methods in this module are specific functions for computing
|
| 36 |
+
# atom and bond features. New ones can be easily added though, and these are
|
| 37 |
+
# passed directly to the Preprocessor class.
|
| 38 |
+
|
| 39 |
+
def get_ring_size(obj, max_size=12):
|
| 40 |
+
if not obj.IsInRing():
|
| 41 |
+
return 0
|
| 42 |
+
else:
|
| 43 |
+
for i in range(max_size):
|
| 44 |
+
if obj.IsInRingSize(i):
|
| 45 |
+
return i
|
| 46 |
+
else:
|
| 47 |
+
return 'max'
|
| 48 |
+
|
| 49 |
+
def atom_features(atom):
|
| 50 |
+
return atom.GetAtomicNum()
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def atom_features_v1(atom):
|
| 54 |
+
""" Return an integer hash representing the atom type
|
| 55 |
+
"""
|
| 56 |
+
|
| 57 |
+
return str((
|
| 58 |
+
atom.GetSymbol(),
|
| 59 |
+
atom.GetDegree(),
|
| 60 |
+
atom.GetTotalNumHs(),
|
| 61 |
+
atom.GetImplicitValence(),
|
| 62 |
+
atom.GetIsAromatic(),
|
| 63 |
+
))
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def atom_features_v2(atom):
|
| 67 |
+
|
| 68 |
+
props = ['GetChiralTag', 'GetDegree', 'GetExplicitValence',
|
| 69 |
+
'GetFormalCharge', 'GetHybridization', 'GetImplicitValence',
|
| 70 |
+
'GetIsAromatic', 'GetNoImplicit', 'GetNumExplicitHs',
|
| 71 |
+
'GetNumImplicitHs', 'GetNumRadicalElectrons', 'GetSymbol',
|
| 72 |
+
'GetTotalDegree', 'GetTotalNumHs', 'GetTotalValence']
|
| 73 |
+
|
| 74 |
+
atom_type = [getattr(atom, prop)() for prop in props]
|
| 75 |
+
atom_type += [get_ring_size(atom)]
|
| 76 |
+
|
| 77 |
+
return str(tuple(atom_type))
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def bond_features_v1(bond, **kwargs):
|
| 81 |
+
""" Return an integer hash representing the bond type.
|
| 82 |
+
|
| 83 |
+
flipped : bool
|
| 84 |
+
Only valid for 'v3' version, whether to swap the begin and end atom types
|
| 85 |
+
|
| 86 |
+
"""
|
| 87 |
+
|
| 88 |
+
return str((
|
| 89 |
+
bond.GetBondType(),
|
| 90 |
+
bond.GetIsConjugated(),
|
| 91 |
+
bond.IsInRing(),
|
| 92 |
+
sorted([
|
| 93 |
+
bond.GetBeginAtom().GetSymbol(),
|
| 94 |
+
bond.GetEndAtom().GetSymbol()]),
|
| 95 |
+
))
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def bond_features_v2(bond, **kwargs):
|
| 99 |
+
|
| 100 |
+
return str((
|
| 101 |
+
bond.GetBondType(),
|
| 102 |
+
bond.GetIsConjugated(),
|
| 103 |
+
bond.GetStereo(),
|
| 104 |
+
get_ring_size(bond),
|
| 105 |
+
sorted([
|
| 106 |
+
bond.GetBeginAtom().GetSymbol(),
|
| 107 |
+
bond.GetEndAtom().GetSymbol()]),
|
| 108 |
+
))
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def bond_features_v3(bond, flipped=False):
|
| 112 |
+
|
| 113 |
+
if not flipped:
|
| 114 |
+
start_atom = atom_features(bond.GetBeginAtom())
|
| 115 |
+
end_atom = atom_features(bond.GetEndAtom())
|
| 116 |
+
|
| 117 |
+
else:
|
| 118 |
+
start_atom = atom_features(bond.GetEndAtom())
|
| 119 |
+
end_atom = atom_features(bond.GetBeginAtom())
|
| 120 |
+
|
| 121 |
+
return str((
|
| 122 |
+
bond.GetBondType(),
|
| 123 |
+
bond.GetIsConjugated(),
|
| 124 |
+
bond.GetStereo(),
|
| 125 |
+
get_ring_size(bond),
|
| 126 |
+
bond.GetEndAtom().GetSymbol(),
|
| 127 |
+
start_atom,
|
| 128 |
+
end_atom))
|
nfp/preprocessing/preprocessor.py
ADDED
|
@@ -0,0 +1,734 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging, sys
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
from tqdm import tqdm
|
| 5 |
+
from scipy.linalg import eigh
|
| 6 |
+
|
| 7 |
+
from rdkit import Chem
|
| 8 |
+
from rdkit.Chem import MolFromSmiles, MolToSmiles, AddHs
|
| 9 |
+
|
| 10 |
+
from . import features
|
| 11 |
+
from .features import Tokenizer
|
| 12 |
+
import time
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class SmilesPreprocessor(object):
|
| 16 |
+
""" Given a list of SMILES strings, encode these molecules as atom and
|
| 17 |
+
connectivity feature matricies.
|
| 18 |
+
|
| 19 |
+
Example:
|
| 20 |
+
>>> preprocessor = SmilesPreprocessor(explicit_hs=False)
|
| 21 |
+
>>> inputs = preprocessor.fit(data.smiles)
|
| 22 |
+
"""
|
| 23 |
+
|
| 24 |
+
def __init__(self, explicit_hs=True, atom_features=None, bond_features=None):
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
explicit_hs : bool
|
| 28 |
+
whether to tell RDkit to add H's to a molecule.
|
| 29 |
+
atom_features : function
|
| 30 |
+
A function applied to an rdkit.Atom that returns some
|
| 31 |
+
representation (i.e., string, integer) for the Tokenizer class.
|
| 32 |
+
bond_features : function
|
| 33 |
+
A function applied to an rdkit Bond to return some description.
|
| 34 |
+
|
| 35 |
+
"""
|
| 36 |
+
|
| 37 |
+
self.atom_tokenizer = Tokenizer()
|
| 38 |
+
self.bond_tokenizer = Tokenizer()
|
| 39 |
+
self.explicit_hs = explicit_hs
|
| 40 |
+
|
| 41 |
+
if atom_features is None:
|
| 42 |
+
atom_features = features.atom_features
|
| 43 |
+
|
| 44 |
+
if bond_features is None:
|
| 45 |
+
bond_features = features.bond_features_v1
|
| 46 |
+
|
| 47 |
+
self.atom_features = atom_features
|
| 48 |
+
self.bond_features = bond_features
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def fit(self, smiles_iterator):
|
| 52 |
+
""" Fit an iterator of SMILES strings, creating new atom and bond
|
| 53 |
+
tokens for unseen molecules. Returns a dictionary with 'atom' and
|
| 54 |
+
'connectivity' entries """
|
| 55 |
+
return list(self.preprocess(smiles_iterator, train=True))
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def predict(self, smiles_iterator):
|
| 59 |
+
""" Uses previously determined atom and bond tokens to convert a SMILES
|
| 60 |
+
iterator into 'atom' and 'connectivity' matrices. Ensures that atom and
|
| 61 |
+
bond classes commute with previously determined results. """
|
| 62 |
+
return list(self.preprocess(smiles_iterator, train=False))
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def preprocess(self, smiles_iterator, train=True):
|
| 66 |
+
|
| 67 |
+
self.atom_tokenizer.train = train
|
| 68 |
+
self.bond_tokenizer.train = train
|
| 69 |
+
|
| 70 |
+
for smiles in tqdm(smiles_iterator):
|
| 71 |
+
yield self.construct_feature_matrices(smiles)
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
@property
|
| 75 |
+
def atom_classes(self):
|
| 76 |
+
""" The number of atom types found (includes the 0 null-atom type) """
|
| 77 |
+
return self.atom_tokenizer.num_classes + 1
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
@property
|
| 81 |
+
def bond_classes(self):
|
| 82 |
+
""" The number of bond types found (includes the 0 null-bond type) """
|
| 83 |
+
return self.bond_tokenizer.num_classes + 1
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def construct_feature_matrices(self, smiles):
|
| 87 |
+
""" construct a molecule from the given smiles string and return atom
|
| 88 |
+
and bond classes.
|
| 89 |
+
|
| 90 |
+
Returns
|
| 91 |
+
dict with entries
|
| 92 |
+
'n_atom' : number of atoms in the molecule
|
| 93 |
+
'n_bond' : number of bonds in the molecule
|
| 94 |
+
'atom' : (n_atom,) length list of atom classes
|
| 95 |
+
'bond' : (n_bond,) list of bond classes
|
| 96 |
+
'connectivity' : (n_bond, 2) array of source atom, target atom pairs.
|
| 97 |
+
|
| 98 |
+
"""
|
| 99 |
+
|
| 100 |
+
mol = MolFromSmiles(smiles)
|
| 101 |
+
if self.explicit_hs:
|
| 102 |
+
mol = AddHs(mol)
|
| 103 |
+
|
| 104 |
+
n_atom = len(mol.GetAtoms())
|
| 105 |
+
n_bond = 2 * len(mol.GetBonds())
|
| 106 |
+
|
| 107 |
+
# If its an isolated atom, add a self-link
|
| 108 |
+
if n_bond == 0:
|
| 109 |
+
n_bond = 1
|
| 110 |
+
|
| 111 |
+
atom_feature_matrix = np.zeros(n_atom, dtype='int')
|
| 112 |
+
bond_feature_matrix = np.zeros(n_bond, dtype='int')
|
| 113 |
+
connectivity = np.zeros((n_bond, 2), dtype='int')
|
| 114 |
+
|
| 115 |
+
bond_index = 0
|
| 116 |
+
|
| 117 |
+
atom_seq = mol.GetAtoms()
|
| 118 |
+
atoms = [atom_seq[i] for i in range(n_atom)]
|
| 119 |
+
|
| 120 |
+
for n, atom in enumerate(atoms):
|
| 121 |
+
|
| 122 |
+
# Atom Classes
|
| 123 |
+
atom_feature_matrix[n] = self.atom_tokenizer(
|
| 124 |
+
self.atom_features(atom))
|
| 125 |
+
|
| 126 |
+
start_index = atom.GetIdx()
|
| 127 |
+
|
| 128 |
+
for bond in atom.GetBonds():
|
| 129 |
+
# Is the bond pointing at the target atom
|
| 130 |
+
rev = bond.GetBeginAtomIdx() != start_index
|
| 131 |
+
|
| 132 |
+
# Bond Classes
|
| 133 |
+
bond_feature_matrix[n] = self.bond_tokenizer(
|
| 134 |
+
self.bond_features(bond, flipped=rev))
|
| 135 |
+
|
| 136 |
+
# Connectivity
|
| 137 |
+
if not rev: # Original direction
|
| 138 |
+
connectivity[bond_index, 0] = bond.GetBeginAtomIdx()
|
| 139 |
+
connectivity[bond_index, 1] = bond.GetEndAtomIdx()
|
| 140 |
+
|
| 141 |
+
else: # Reversed
|
| 142 |
+
connectivity[bond_index, 0] = bond.GetEndAtomIdx()
|
| 143 |
+
connectivity[bond_index, 1] = bond.GetBeginAtomIdx()
|
| 144 |
+
|
| 145 |
+
bond_index += 1
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
return {
|
| 149 |
+
'n_atom': n_atom,
|
| 150 |
+
'n_bond': n_bond,
|
| 151 |
+
'atom': atom_feature_matrix,
|
| 152 |
+
'bond': bond_feature_matrix,
|
| 153 |
+
'connectivity': connectivity,
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
class ConnectivityAPreprocessor(object):
|
| 158 |
+
""" Given a list of SMILES strings, encode these molecules as atom and
|
| 159 |
+
connectivity feature matricies.
|
| 160 |
+
|
| 161 |
+
Example:
|
| 162 |
+
>>> preprocessor = SmilesPreprocessor(explicit_hs=False)
|
| 163 |
+
>>> inputs = preprocessor.fit(data.smiles)
|
| 164 |
+
"""
|
| 165 |
+
|
| 166 |
+
def __init__(self, explicit_hs=True, atom_features=None, bond_features=None):
|
| 167 |
+
"""
|
| 168 |
+
|
| 169 |
+
explicit_hs : bool
|
| 170 |
+
whether to tell RDkit to add H's to a molecule.
|
| 171 |
+
atom_features : function
|
| 172 |
+
A function applied to an rdkit.Atom that returns some
|
| 173 |
+
representation (i.e., string, integer) for the Tokenizer class.
|
| 174 |
+
bond_features : function
|
| 175 |
+
A function applied to an rdkit Bond to return some description.
|
| 176 |
+
|
| 177 |
+
"""
|
| 178 |
+
|
| 179 |
+
self.atom_tokenizer = Tokenizer()
|
| 180 |
+
self.bond_tokenizer = Tokenizer()
|
| 181 |
+
self.explicit_hs = explicit_hs
|
| 182 |
+
|
| 183 |
+
if atom_features is None:
|
| 184 |
+
atom_features = features.atom_features_v1
|
| 185 |
+
|
| 186 |
+
if bond_features is None:
|
| 187 |
+
bond_features = features.bond_features_v1
|
| 188 |
+
|
| 189 |
+
self.atom_features = atom_features
|
| 190 |
+
self.bond_features = bond_features
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
def fit(self, smiles_iterator):
|
| 194 |
+
""" Fit an iterator of SMILES strings, creating new atom and bond
|
| 195 |
+
tokens for unseen molecules. Returns a dictionary with 'atom' and
|
| 196 |
+
'connectivity' entries """
|
| 197 |
+
return list(self.preprocess(smiles_iterator, train=True))
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
def predict(self, smiles_iterator):
|
| 201 |
+
""" Uses previously determined atom and bond tokens to convert a SMILES
|
| 202 |
+
iterator into 'atom' and 'connectivity' matrices. Ensures that atom and
|
| 203 |
+
bond classes commute with previously determined results. """
|
| 204 |
+
return list(self.preprocess(smiles_iterator, train=False))
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
def preprocess(self, smiles_iterator, train=True):
|
| 208 |
+
|
| 209 |
+
self.atom_tokenizer.train = train
|
| 210 |
+
self.bond_tokenizer.train = train
|
| 211 |
+
|
| 212 |
+
for smiles in tqdm(smiles_iterator):
|
| 213 |
+
yield self.construct_feature_matrices(smiles)
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
@property
|
| 217 |
+
def atom_classes(self):
|
| 218 |
+
""" The number of atom types found (includes the 0 null-atom type) """
|
| 219 |
+
return self.atom_tokenizer.num_classes + 1
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
@property
|
| 223 |
+
def bond_classes(self):
|
| 224 |
+
""" The number of bond types found (includes the 0 null-bond type) """
|
| 225 |
+
return self.bond_tokenizer.num_classes + 1
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
def construct_feature_matrices(self, smiles):
|
| 229 |
+
""" construct a molecule from the given smiles string and return atom
|
| 230 |
+
and bond classes.
|
| 231 |
+
|
| 232 |
+
Returns
|
| 233 |
+
dict with entries
|
| 234 |
+
'n_atom' : number of atoms in the molecule
|
| 235 |
+
'n_bond' : number of bonds in the molecule
|
| 236 |
+
'atom' : (n_atom,) length list of atom classes
|
| 237 |
+
'bond' : (n_bond,) list of bond classes
|
| 238 |
+
'connectivity' : (n_bond, 2) array of source atom, target atom pairs.
|
| 239 |
+
|
| 240 |
+
"""
|
| 241 |
+
|
| 242 |
+
mol = MolFromSmiles(smiles)
|
| 243 |
+
if self.explicit_hs:
|
| 244 |
+
mol = AddHs(mol)
|
| 245 |
+
|
| 246 |
+
n_atom = len(mol.GetAtoms())
|
| 247 |
+
n_bond = 2 * len(mol.GetBonds())
|
| 248 |
+
|
| 249 |
+
# If its an isolated atom, add a self-link
|
| 250 |
+
if n_bond == 0:
|
| 251 |
+
n_bond = 1
|
| 252 |
+
|
| 253 |
+
atom_feature_matrix = np.zeros(n_atom, dtype='int')
|
| 254 |
+
bond_feature_matrix = np.zeros(n_bond, dtype='int')
|
| 255 |
+
connectivity = np.zeros((n_bond, 2), dtype='int')
|
| 256 |
+
|
| 257 |
+
bond_index = 0
|
| 258 |
+
|
| 259 |
+
atom_seq = mol.GetAtoms()
|
| 260 |
+
atoms = [atom_seq[i] for i in range(n_atom)]
|
| 261 |
+
|
| 262 |
+
for n, atom in enumerate(atoms):
|
| 263 |
+
|
| 264 |
+
# Atom Classes
|
| 265 |
+
atom_feature_matrix[n] = self.atom_tokenizer(
|
| 266 |
+
self.atom_features(atom))
|
| 267 |
+
|
| 268 |
+
start_index = atom.GetIdx()
|
| 269 |
+
|
| 270 |
+
for bond in atom.GetBonds():
|
| 271 |
+
# Is the bond pointing at the target atom
|
| 272 |
+
rev = bond.GetBeginAtomIdx() != start_index
|
| 273 |
+
|
| 274 |
+
# Bond Classes
|
| 275 |
+
bond_feature_matrix[n] = self.bond_tokenizer(
|
| 276 |
+
self.bond_features(bond, flipped=rev))
|
| 277 |
+
|
| 278 |
+
# Connectivity
|
| 279 |
+
if not rev: # Original direction
|
| 280 |
+
connectivity[bond_index, 0] = bond.GetBeginAtomIdx()
|
| 281 |
+
connectivity[bond_index, 1] = bond.GetEndAtomIdx()
|
| 282 |
+
|
| 283 |
+
else: # Reversed
|
| 284 |
+
connectivity[bond_index, 0] = bond.GetEndAtomIdx()
|
| 285 |
+
connectivity[bond_index, 1] = bond.GetBeginAtomIdx()
|
| 286 |
+
|
| 287 |
+
bond_index += 1
|
| 288 |
+
|
| 289 |
+
return {
|
| 290 |
+
'n_atom': n_atom,
|
| 291 |
+
'n_bond': n_bond,
|
| 292 |
+
'atom': atom_feature_matrix,
|
| 293 |
+
'bond': bond_feature_matrix,
|
| 294 |
+
'connectivity': connectivity,
|
| 295 |
+
}
|
| 296 |
+
|
| 297 |
+
|
| 298 |
+
class MolPreprocessor(SmilesPreprocessor):
|
| 299 |
+
""" I should refactor this into a base class and separate
|
| 300 |
+
SmilesPreprocessor classes. But the idea is that we only need to redefine
|
| 301 |
+
the `construct_feature_matrices` method to have a working preprocessor that
|
| 302 |
+
handles 3D structures.
|
| 303 |
+
|
| 304 |
+
We'll pass an iterator of mol objects instead of SMILES strings this time,
|
| 305 |
+
though.
|
| 306 |
+
|
| 307 |
+
"""
|
| 308 |
+
|
| 309 |
+
def __init__(self, n_neighbors, cutoff, **kwargs):
|
| 310 |
+
""" A preprocessor class that also returns distances between
|
| 311 |
+
neighboring atoms. Adds edges for non-bonded atoms to include a maximum
|
| 312 |
+
of n_neighbors around each atom """
|
| 313 |
+
|
| 314 |
+
self.n_neighbors = n_neighbors
|
| 315 |
+
self.cutoff = cutoff
|
| 316 |
+
super(MolPreprocessor, self).__init__(**kwargs)
|
| 317 |
+
|
| 318 |
+
|
| 319 |
+
def construct_feature_matrices(self, mol):
|
| 320 |
+
""" Given an rdkit mol, return atom feature matrices, bond feature
|
| 321 |
+
matrices, and connectivity matrices.
|
| 322 |
+
|
| 323 |
+
Returns
|
| 324 |
+
dict with entries
|
| 325 |
+
'n_atom' : number of atoms in the molecule
|
| 326 |
+
'n_bond' : number of edges (likely n_atom * n_neighbors)
|
| 327 |
+
'atom' : (n_atom,) length list of atom classes
|
| 328 |
+
'bond' : (n_bond,) list of bond classes. 0 for no bond
|
| 329 |
+
'distance' : (n_bond,) list of bond distances
|
| 330 |
+
'connectivity' : (n_bond, 2) array of source atom, target atom pairs.
|
| 331 |
+
|
| 332 |
+
"""
|
| 333 |
+
|
| 334 |
+
n_atom = len(mol.GetAtoms())
|
| 335 |
+
|
| 336 |
+
# n_bond is actually the number of atom-atom pairs, so this is defined
|
| 337 |
+
# by the number of neighbors for each atom.
|
| 338 |
+
#if there is cutoff,
|
| 339 |
+
distance_matrix = Chem.Get3DDistanceMatrix(mol)
|
| 340 |
+
|
| 341 |
+
if self.n_neighbors <= (n_atom - 1):
|
| 342 |
+
n_bond = self.n_neighbors * n_atom
|
| 343 |
+
else:
|
| 344 |
+
# If there are fewer atoms than n_neighbors, all atoms will be
|
| 345 |
+
# connected
|
| 346 |
+
n_bond = distance_matrix[(distance_matrix < self.cutoff) & (distance_matrix != 0)].size
|
| 347 |
+
|
| 348 |
+
if n_bond == 0: n_bond = 1
|
| 349 |
+
|
| 350 |
+
# Initialize the matrices to be filled in during the following loop.
|
| 351 |
+
atom_feature_matrix = np.zeros(n_atom, dtype='int')
|
| 352 |
+
bond_feature_matrix = np.zeros(n_bond, dtype='int')
|
| 353 |
+
bond_distance_matrix = np.zeros(n_bond, dtype=np.float32)
|
| 354 |
+
connectivity = np.zeros((n_bond, 2), dtype='int')
|
| 355 |
+
|
| 356 |
+
# Hopefully we've filtered out all problem mols by now.
|
| 357 |
+
if mol is None:
|
| 358 |
+
raise RuntimeError("Issue in loading mol")
|
| 359 |
+
|
| 360 |
+
# Get a list of the atoms in the molecule.
|
| 361 |
+
atom_seq = mol.GetAtoms()
|
| 362 |
+
atoms = [atom_seq[i] for i in range(n_atom)]
|
| 363 |
+
|
| 364 |
+
# Here we loop over each atom, and the inner loop iterates over each
|
| 365 |
+
# neighbor of the current atom.
|
| 366 |
+
bond_index = 0 # keep track of our current bond.
|
| 367 |
+
for n, atom in enumerate(atoms):
|
| 368 |
+
|
| 369 |
+
# update atom feature matrix
|
| 370 |
+
atom_feature_matrix[n] = self.atom_tokenizer(
|
| 371 |
+
self.atom_features(atom))
|
| 372 |
+
|
| 373 |
+
# if n_neighbors is greater than total atoms, then each atom is a
|
| 374 |
+
# neighbor.
|
| 375 |
+
if (self.n_neighbors + 1) > len(mol.GetAtoms()):
|
| 376 |
+
neighbor_end_index = len(mol.GetAtoms())
|
| 377 |
+
else:
|
| 378 |
+
neighbor_end_index = (self.n_neighbors + 1)
|
| 379 |
+
|
| 380 |
+
distance_atom = distance_matrix[n, :]
|
| 381 |
+
cutoff_end_index = distance_atom[distance_atom < self.cutoff].size
|
| 382 |
+
|
| 383 |
+
end_index = min(neighbor_end_index, cutoff_end_index)
|
| 384 |
+
|
| 385 |
+
# Loop over each of the nearest neighbors
|
| 386 |
+
|
| 387 |
+
neighbor_inds = distance_matrix[n, :].argsort()[1:end_index]
|
| 388 |
+
if len(neighbor_inds)==0: neighbor_inds = [n]
|
| 389 |
+
for neighbor in neighbor_inds:
|
| 390 |
+
|
| 391 |
+
# update bond feature matrix
|
| 392 |
+
bond = mol.GetBondBetweenAtoms(n, int(neighbor))
|
| 393 |
+
if bond is None:
|
| 394 |
+
bond_feature_matrix[bond_index] = 0
|
| 395 |
+
else:
|
| 396 |
+
rev = False if bond.GetBeginAtomIdx() == n else True
|
| 397 |
+
bond_feature_matrix[bond_index] = self.bond_tokenizer(
|
| 398 |
+
self.bond_features(bond, flipped=rev))
|
| 399 |
+
|
| 400 |
+
distance = distance_matrix[n, neighbor]
|
| 401 |
+
bond_distance_matrix[bond_index] = distance
|
| 402 |
+
|
| 403 |
+
# update connectivity matrix
|
| 404 |
+
connectivity[bond_index, 0] = n
|
| 405 |
+
connectivity[bond_index, 1] = neighbor
|
| 406 |
+
|
| 407 |
+
bond_index += 1
|
| 408 |
+
print(connectivity)
|
| 409 |
+
|
| 410 |
+
return {
|
| 411 |
+
'n_atom': n_atom,
|
| 412 |
+
'n_bond': n_bond,
|
| 413 |
+
'atom': atom_feature_matrix,
|
| 414 |
+
'bond': bond_feature_matrix,
|
| 415 |
+
'distance': bond_distance_matrix,
|
| 416 |
+
'connectivity': connectivity,
|
| 417 |
+
}
|
| 418 |
+
|
| 419 |
+
|
| 420 |
+
class MolBPreprocessor(MolPreprocessor):
|
| 421 |
+
"""
|
| 422 |
+
This is a subclass of Molpreprocessor that preprocessor molecule with
|
| 423 |
+
bond property target
|
| 424 |
+
"""
|
| 425 |
+
def __init__(self, **kwargs):
|
| 426 |
+
"""
|
| 427 |
+
A preprocessor class that also returns bond_target_matrix, besides the bond matrix
|
| 428 |
+
returned by MolPreprocessor. The bond_target_matrix is then used as ref to reduce molecule
|
| 429 |
+
to bond property
|
| 430 |
+
"""
|
| 431 |
+
super(MolBPreprocessor, self).__init__(**kwargs)
|
| 432 |
+
|
| 433 |
+
def construct_feature_matrices(self, entry):
|
| 434 |
+
"""
|
| 435 |
+
Given an entry contining rdkit molecule, bond_index and for the target property,
|
| 436 |
+
return atom
|
| 437 |
+
feature matrices, bond feature matrices, distance matrices, connectivity matrices and bond
|
| 438 |
+
ref matrices.
|
| 439 |
+
|
| 440 |
+
returns
|
| 441 |
+
dict with entries
|
| 442 |
+
see MolPreproccessor
|
| 443 |
+
'bond_index' : ref array to the bond index
|
| 444 |
+
"""
|
| 445 |
+
mol, bond_index_array = entry
|
| 446 |
+
|
| 447 |
+
n_atom = len(mol.GetAtoms())
|
| 448 |
+
n_pro = len(bond_index_array)
|
| 449 |
+
|
| 450 |
+
# n_bond is actually the number of atom-atom pairs, so this is defined
|
| 451 |
+
# by the number of neighbors for each atom.
|
| 452 |
+
#if there is cutoff,
|
| 453 |
+
distance_matrix = Chem.Get3DDistanceMatrix(mol)
|
| 454 |
+
|
| 455 |
+
if self.n_neighbors <= (n_atom - 1):
|
| 456 |
+
n_bond = self.n_neighbors * n_atom
|
| 457 |
+
else:
|
| 458 |
+
# If there are fewer atoms than n_neighbors, all atoms will be
|
| 459 |
+
# connected
|
| 460 |
+
n_bond = distance_matrix[(distance_matrix < self.cutoff) & (distance_matrix != 0)].size
|
| 461 |
+
|
| 462 |
+
if n_bond == 0: n_bond = 1
|
| 463 |
+
|
| 464 |
+
# Initialize the matrices to be filled in during the following loop.
|
| 465 |
+
atom_feature_matrix = np.zeros(n_atom, dtype='int')
|
| 466 |
+
bond_feature_matrix = np.zeros(n_bond, dtype='int')
|
| 467 |
+
bond_distance_matrix = np.zeros(n_bond, dtype=np.float32)
|
| 468 |
+
bond_index_matrix = np.full(n_bond, -1, dtype='int')
|
| 469 |
+
connectivity = np.zeros((n_bond, 2), dtype='int')
|
| 470 |
+
|
| 471 |
+
# Hopefully we've filtered out all problem mols by now.
|
| 472 |
+
if mol is None:
|
| 473 |
+
raise RuntimeError("Issue in loading mol")
|
| 474 |
+
|
| 475 |
+
# Get a list of the atoms in the molecule.
|
| 476 |
+
atom_seq = mol.GetAtoms()
|
| 477 |
+
atoms = [atom_seq[i] for i in range(n_atom)]
|
| 478 |
+
|
| 479 |
+
# Here we loop over each atom, and the inner loop iterates over each
|
| 480 |
+
# neighbor of the current atom.
|
| 481 |
+
bond_index = 0 # keep track of our current bond.
|
| 482 |
+
for n, atom in enumerate(atoms):
|
| 483 |
+
# update atom feature matrix
|
| 484 |
+
atom_feature_matrix[n] = self.atom_tokenizer(
|
| 485 |
+
self.atom_features(atom))
|
| 486 |
+
|
| 487 |
+
# if n_neighbors is greater than total atoms, then each atom is a
|
| 488 |
+
# neighbor.
|
| 489 |
+
if (self.n_neighbors + 1) > len(mol.GetAtoms()):
|
| 490 |
+
neighbor_end_index = len(mol.GetAtoms())
|
| 491 |
+
else:
|
| 492 |
+
neighbor_end_index = (self.n_neighbors + 1)
|
| 493 |
+
|
| 494 |
+
distance_atom = distance_matrix[n, :]
|
| 495 |
+
cutoff_end_index = distance_atom[distance_atom < self.cutoff].size
|
| 496 |
+
|
| 497 |
+
end_index = min(neighbor_end_index, cutoff_end_index)
|
| 498 |
+
|
| 499 |
+
# Loop over each of the nearest neighbors
|
| 500 |
+
|
| 501 |
+
neighbor_inds = distance_matrix[n, :].argsort()[1:end_index]
|
| 502 |
+
if len(neighbor_inds)==0: neighbor_inds = [n]
|
| 503 |
+
for neighbor in neighbor_inds:
|
| 504 |
+
|
| 505 |
+
# update bond feature matrix
|
| 506 |
+
bond = mol.GetBondBetweenAtoms(n, int(neighbor))
|
| 507 |
+
if bond is None:
|
| 508 |
+
bond_feature_matrix[bond_index] = 0
|
| 509 |
+
else:
|
| 510 |
+
rev = False if bond.GetBeginAtomIdx() == n else True
|
| 511 |
+
bond_feature_matrix[bond_index] = self.bond_tokenizer(
|
| 512 |
+
self.bond_features(bond, flipped=rev))
|
| 513 |
+
try:
|
| 514 |
+
bond_index_matrix[bond_index] = bond_index_array.tolist().index(bond.GetIdx())
|
| 515 |
+
except:
|
| 516 |
+
pass
|
| 517 |
+
|
| 518 |
+
distance = distance_matrix[n, neighbor]
|
| 519 |
+
bond_distance_matrix[bond_index] = distance
|
| 520 |
+
|
| 521 |
+
# update connectivity matrix
|
| 522 |
+
connectivity[bond_index, 0] = n
|
| 523 |
+
connectivity[bond_index, 1] = neighbor
|
| 524 |
+
|
| 525 |
+
bond_index += 1
|
| 526 |
+
return {
|
| 527 |
+
'n_atom': n_atom,
|
| 528 |
+
'n_bond': n_bond,
|
| 529 |
+
'n_pro': n_pro,
|
| 530 |
+
'atom': atom_feature_matrix,
|
| 531 |
+
'bond': bond_feature_matrix,
|
| 532 |
+
'distance': bond_distance_matrix,
|
| 533 |
+
'connectivity': connectivity,
|
| 534 |
+
'bond_index': bond_index_matrix,
|
| 535 |
+
}
|
| 536 |
+
|
| 537 |
+
class MolAPreprocessor(MolPreprocessor):
|
| 538 |
+
"""
|
| 539 |
+
This is a subclass of Molpreprocessor that preprocessor molecule with
|
| 540 |
+
bond property target
|
| 541 |
+
"""
|
| 542 |
+
def __init__(self, **kwargs):
|
| 543 |
+
"""
|
| 544 |
+
A preprocessor class that also returns bond_target_matrix, besides the bond matrix
|
| 545 |
+
returned by MolPreprocessor. The bond_target_matrix is then used as ref to reduce molecule
|
| 546 |
+
to bond property
|
| 547 |
+
"""
|
| 548 |
+
super(MolAPreprocessor, self).__init__(**kwargs)
|
| 549 |
+
|
| 550 |
+
def construct_feature_matrices(self, entry):
|
| 551 |
+
"""
|
| 552 |
+
Given an entry contining rdkit molecule, bond_index and for the target property,
|
| 553 |
+
return atom
|
| 554 |
+
feature matrices, bond feature matrices, distance matrices, connectivity matrices and bond
|
| 555 |
+
ref matrices.
|
| 556 |
+
|
| 557 |
+
returns
|
| 558 |
+
dict with entries
|
| 559 |
+
see MolPreproccessor
|
| 560 |
+
'bond_index' : ref array to the bond index
|
| 561 |
+
"""
|
| 562 |
+
mol, atom_index_array = entry
|
| 563 |
+
|
| 564 |
+
n_atom = len(mol.GetAtoms())
|
| 565 |
+
n_pro = len(atom_index_array)
|
| 566 |
+
|
| 567 |
+
# n_bond is actually the number of atom-atom pairs, so this is defined
|
| 568 |
+
# by the number of neighbors for each atom.
|
| 569 |
+
#if there is cutoff,
|
| 570 |
+
distance_matrix = Chem.Get3DDistanceMatrix(mol)
|
| 571 |
+
|
| 572 |
+
if self.n_neighbors <= (n_atom - 1):
|
| 573 |
+
n_bond = self.n_neighbors * n_atom
|
| 574 |
+
else:
|
| 575 |
+
# If there are fewer atoms than n_neighbors, all atoms will be
|
| 576 |
+
# connected
|
| 577 |
+
n_bond = distance_matrix[(distance_matrix < self.cutoff) & (distance_matrix != 0)].size
|
| 578 |
+
|
| 579 |
+
if n_bond == 0: n_bond = 1
|
| 580 |
+
|
| 581 |
+
# Initialize the matrices to be filled in during the following loop.
|
| 582 |
+
atom_feature_matrix = np.zeros(n_atom, dtype='int')
|
| 583 |
+
bond_feature_matrix = np.zeros(n_bond, dtype='int')
|
| 584 |
+
bond_distance_matrix = np.zeros(n_bond, dtype=np.float32)
|
| 585 |
+
atom_index_matrix = np.full(n_atom, -1, dtype='int')
|
| 586 |
+
connectivity = np.zeros((n_bond, 2), dtype='int')
|
| 587 |
+
|
| 588 |
+
# Hopefully we've filtered out all problem mols by now.
|
| 589 |
+
if mol is None:
|
| 590 |
+
raise RuntimeError("Issue in loading mol")
|
| 591 |
+
|
| 592 |
+
# Get a list of the atoms in the molecule.
|
| 593 |
+
atom_seq = mol.GetAtoms()
|
| 594 |
+
atoms = [atom_seq[i] for i in range(n_atom)]
|
| 595 |
+
|
| 596 |
+
# Here we loop over each atom, and the inner loop iterates over each
|
| 597 |
+
# neighbor of the current atom.
|
| 598 |
+
bond_index = 0 # keep track of our current bond.
|
| 599 |
+
for n, atom in enumerate(atoms):
|
| 600 |
+
# update atom feature matrix
|
| 601 |
+
atom_feature_matrix[n] = self.atom_tokenizer(
|
| 602 |
+
self.atom_features(atom))
|
| 603 |
+
try:
|
| 604 |
+
atom_index_matrix[n] = atom_index_array.tolist().index(atom.GetIdx())
|
| 605 |
+
except:
|
| 606 |
+
pass
|
| 607 |
+
# if n_neighbors is greater than total atoms, then each atom is a
|
| 608 |
+
# neighbor.
|
| 609 |
+
if (self.n_neighbors + 1) > len(mol.GetAtoms()):
|
| 610 |
+
neighbor_end_index = len(mol.GetAtoms())
|
| 611 |
+
else:
|
| 612 |
+
neighbor_end_index = (self.n_neighbors + 1)
|
| 613 |
+
|
| 614 |
+
distance_atom = distance_matrix[n, :]
|
| 615 |
+
cutoff_end_index = distance_atom[distance_atom < self.cutoff].size
|
| 616 |
+
|
| 617 |
+
end_index = min(neighbor_end_index, cutoff_end_index)
|
| 618 |
+
|
| 619 |
+
# Loop over each of the nearest neighbors
|
| 620 |
+
|
| 621 |
+
neighbor_inds = distance_matrix[n, :].argsort()[1:end_index]
|
| 622 |
+
if len(neighbor_inds)==0: neighbor_inds = [n]
|
| 623 |
+
for neighbor in neighbor_inds:
|
| 624 |
+
|
| 625 |
+
# update bond feature matrix
|
| 626 |
+
bond = mol.GetBondBetweenAtoms(n, int(neighbor))
|
| 627 |
+
if bond is None:
|
| 628 |
+
bond_feature_matrix[bond_index] = 0
|
| 629 |
+
else:
|
| 630 |
+
rev = False if bond.GetBeginAtomIdx() == n else True
|
| 631 |
+
bond_feature_matrix[bond_index] = self.bond_tokenizer(
|
| 632 |
+
self.bond_features(bond, flipped=rev))
|
| 633 |
+
|
| 634 |
+
distance = distance_matrix[n, neighbor]
|
| 635 |
+
bond_distance_matrix[bond_index] = distance
|
| 636 |
+
|
| 637 |
+
# update connectivity matrix
|
| 638 |
+
connectivity[bond_index, 0] = n
|
| 639 |
+
connectivity[bond_index, 1] = neighbor
|
| 640 |
+
|
| 641 |
+
bond_index += 1
|
| 642 |
+
return {
|
| 643 |
+
'n_atom': n_atom,
|
| 644 |
+
'n_bond': n_bond,
|
| 645 |
+
'n_pro': n_pro,
|
| 646 |
+
'atom': atom_feature_matrix,
|
| 647 |
+
'bond': bond_feature_matrix,
|
| 648 |
+
'distance': bond_distance_matrix,
|
| 649 |
+
'connectivity': connectivity,
|
| 650 |
+
'atom_index': atom_index_matrix,
|
| 651 |
+
}
|
| 652 |
+
|
| 653 |
+
|
| 654 |
+
# TODO: rewrite this
|
| 655 |
+
# class LaplacianSmilesPreprocessor(SmilesPreprocessor):
|
| 656 |
+
# """ Extends the SmilesPreprocessor class to also return eigenvalues and
|
| 657 |
+
# eigenvectors of the graph laplacian matrix.
|
| 658 |
+
#
|
| 659 |
+
# Example:
|
| 660 |
+
# >>> preprocessor = SmilesPreprocessor(
|
| 661 |
+
# >>> max_atoms=55, max_bonds=62, max_degree=4, explicit_hs=False)
|
| 662 |
+
# >>> atom, connectivity, eigenvalues, eigenvectors = preprocessor.fit(
|
| 663 |
+
# data.smiles)
|
| 664 |
+
# """
|
| 665 |
+
#
|
| 666 |
+
# def preprocess(self, smiles_iterator, train=True):
|
| 667 |
+
#
|
| 668 |
+
# self.atom_tokenizer.train = train
|
| 669 |
+
# self.bond_tokenizer.train = train
|
| 670 |
+
#
|
| 671 |
+
# for smiles in tqdm(smiles_iterator):
|
| 672 |
+
# G = self._mol_to_nx(smiles)
|
| 673 |
+
# A = self._get_atom_feature_matrix(G)
|
| 674 |
+
# C = self._get_connectivity_matrix(G)
|
| 675 |
+
# W, V = self._get_laplacian_spectral_decomp(G)
|
| 676 |
+
# yield A, C, W, V
|
| 677 |
+
#
|
| 678 |
+
#
|
| 679 |
+
# def _get_laplacian_spectral_decomp(self, G):
|
| 680 |
+
# """ Return the eigenvalues and eigenvectors of the graph G, padded to
|
| 681 |
+
# `self.max_atoms`.
|
| 682 |
+
# """
|
| 683 |
+
#
|
| 684 |
+
# w0 = np.zeros((self.max_atoms, 1))
|
| 685 |
+
# v0 = np.zeros((self.max_atoms, self.max_atoms))
|
| 686 |
+
#
|
| 687 |
+
# w, v = eigh(nx.laplacian_matrix(G).todense())
|
| 688 |
+
#
|
| 689 |
+
# num_atoms = len(v)
|
| 690 |
+
#
|
| 691 |
+
# w0[:num_atoms, 0] = w
|
| 692 |
+
# v0[:num_atoms, :num_atoms] = v
|
| 693 |
+
#
|
| 694 |
+
# return w0, v0
|
| 695 |
+
#
|
| 696 |
+
#
|
| 697 |
+
# def fit(self, smiles_iterator):
|
| 698 |
+
# results = self._fit(smiles_iterator)
|
| 699 |
+
# return {'atom': results[0],
|
| 700 |
+
# 'connectivity': results[1],
|
| 701 |
+
# 'w': results[2],
|
| 702 |
+
# 'v': results[3]}
|
| 703 |
+
#
|
| 704 |
+
#
|
| 705 |
+
# def predict(self, smiles_iterator):
|
| 706 |
+
# results = self._predict(smiles_iterator)
|
| 707 |
+
# return {'atom': results[0],
|
| 708 |
+
# 'connectivity': results[1],
|
| 709 |
+
# 'w': results[2],
|
| 710 |
+
# 'v': results[3]}
|
| 711 |
+
|
| 712 |
+
|
| 713 |
+
def get_max_atom_bond_size(smiles_iterator, explicit_hs=True):
|
| 714 |
+
""" Convienence function to get max_atoms, max_bonds for a set of input
|
| 715 |
+
SMILES """
|
| 716 |
+
|
| 717 |
+
max_atoms = 0
|
| 718 |
+
max_bonds = 0
|
| 719 |
+
for smiles in tqdm(smiles_iterator):
|
| 720 |
+
mol = MolFromSmiles(smiles)
|
| 721 |
+
if explicit_hs:
|
| 722 |
+
mol = AddHs(mol)
|
| 723 |
+
max_atoms = max([max_atoms, len(mol.GetAtoms())])
|
| 724 |
+
max_bonds = max([max_bonds, len(mol.GetBonds())])
|
| 725 |
+
|
| 726 |
+
return dict(max_atoms=max_atoms, max_bonds=max_bonds*2)
|
| 727 |
+
|
| 728 |
+
|
| 729 |
+
def canonicalize_smiles(smiles, isomeric=True, sanitize=True):
|
| 730 |
+
try:
|
| 731 |
+
mol = MolFromSmiles(smiles, sanitize=sanitize)
|
| 732 |
+
return MolToSmiles(mol, isomericSmiles=isomeric)
|
| 733 |
+
except Exception:
|
| 734 |
+
pass
|
nfp/preprocessing/scaling.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
|
| 3 |
+
from sklearn.preprocessing import RobustScaler
|
| 4 |
+
from scipy import sparse
|
| 5 |
+
from sklearn.utils import check_array
|
| 6 |
+
from sklearn.utils.validation import FLOAT_DTYPES
|
| 7 |
+
from sklearn.preprocessing.data import _handle_zeros_in_scale
|
| 8 |
+
|
| 9 |
+
class RobustNanScaler(RobustScaler):
|
| 10 |
+
|
| 11 |
+
def _check_array(self, X, copy):
|
| 12 |
+
"""Makes sure centering is not enabled for sparse matrices."""
|
| 13 |
+
X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy,
|
| 14 |
+
estimator=self, dtype=FLOAT_DTYPES, force_all_finite=False)
|
| 15 |
+
|
| 16 |
+
if sparse.issparse(X):
|
| 17 |
+
if self.with_centering:
|
| 18 |
+
raise ValueError(
|
| 19 |
+
"Cannot center sparse matrices: use `with_centering=False`"
|
| 20 |
+
" instead. See docstring for motivation and alternatives.")
|
| 21 |
+
return X
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def fit(self, X, y=None):
|
| 25 |
+
if sparse.issparse(X):
|
| 26 |
+
raise TypeError("RobustScaler cannot be fitted on sparse inputs")
|
| 27 |
+
X = self._check_array(X, self.copy)
|
| 28 |
+
if self.with_centering:
|
| 29 |
+
self.center_ = np.nanmedian(X, axis=0)
|
| 30 |
+
|
| 31 |
+
if self.with_scaling:
|
| 32 |
+
q_min, q_max = self.quantile_range
|
| 33 |
+
if not 0 <= q_min <= q_max <= 100:
|
| 34 |
+
raise ValueError("Invalid quantile range: %s" %
|
| 35 |
+
str(self.quantile_range))
|
| 36 |
+
|
| 37 |
+
q = np.nanpercentile(X, self.quantile_range, axis=0)
|
| 38 |
+
self.scale_ = (q[1] - q[0])
|
| 39 |
+
self.scale_ = _handle_zeros_in_scale(self.scale_, copy=False)
|
| 40 |
+
return self
|
nfp/preprocessing/sequence.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from random import shuffle
|
| 3 |
+
|
| 4 |
+
from keras.utils import Sequence
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class GraphSequence(Sequence):
|
| 8 |
+
|
| 9 |
+
def __init__(self, inputs, y=None, batch_size=1, shuffle=True,
|
| 10 |
+
final_batch=True):
|
| 11 |
+
""" A keras.Sequence generator to be passed to model.fit_generator. (or
|
| 12 |
+
any other *_generator method.) Returns (inputs, y) tuples where
|
| 13 |
+
molecule feature matrices have been stitched together. Offsets the
|
| 14 |
+
connectivity matrices such that atoms are indexed appropriately.
|
| 15 |
+
|
| 16 |
+
batch_size: number of molecules per batch
|
| 17 |
+
shuffle : whether to shuffle the input data
|
| 18 |
+
final_batch : whether to include the final, incomplete batch
|
| 19 |
+
|
| 20 |
+
"""
|
| 21 |
+
self._inputs = inputs
|
| 22 |
+
self._y = np.asarray(y) if y is not None else None
|
| 23 |
+
self._input_keys = list(inputs[0].keys())
|
| 24 |
+
self.batch_size = batch_size
|
| 25 |
+
self.shuffle = shuffle
|
| 26 |
+
self.final_batch = final_batch
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def __len__(self):
|
| 30 |
+
""" Total number of batches """
|
| 31 |
+
if self.final_batch:
|
| 32 |
+
return int(np.ceil(len(self._inputs) / float(self.batch_size)))
|
| 33 |
+
else:
|
| 34 |
+
return int(np.floor(len(self._inputs) / float(self.batch_size)))
|
| 35 |
+
|
| 36 |
+
def on_epoch_end(self):
|
| 37 |
+
if self.shuffle:
|
| 38 |
+
indices = np.arange(0, len(self._inputs))
|
| 39 |
+
np.random.shuffle(indices)
|
| 40 |
+
self._inputs = [self._inputs[i] for i in indices]
|
| 41 |
+
if self._y is not None:
|
| 42 |
+
self._y = self._y[indices]
|
| 43 |
+
|
| 44 |
+
def __getitem__(self, idx):
|
| 45 |
+
""" Calculate the feature matrices for a whole batch (with index `i` <
|
| 46 |
+
self.__len__). This involves adding offsets to the indices for each
|
| 47 |
+
atom in the connectivity matrix; such that atoms and bonds in later
|
| 48 |
+
molecules still refer to the correct atoms.
|
| 49 |
+
|
| 50 |
+
"""
|
| 51 |
+
batch_indexes = idx * self.batch_size + np.arange(0, self.batch_size)
|
| 52 |
+
batch_indexes = batch_indexes[batch_indexes < len(self._inputs)]
|
| 53 |
+
|
| 54 |
+
batch_data = {
|
| 55 |
+
key: self._concat([self._inputs[i][key] for i in batch_indexes])
|
| 56 |
+
for key in self._input_keys}
|
| 57 |
+
|
| 58 |
+
# Offset the connectivity matrix to account for the multiple graphs per
|
| 59 |
+
# batch
|
| 60 |
+
offset = _compute_stacked_offsets(
|
| 61 |
+
batch_data['n_atom'], batch_data['n_bond'])
|
| 62 |
+
|
| 63 |
+
batch_data['connectivity'] += offset[:, np.newaxis]
|
| 64 |
+
|
| 65 |
+
# Compute graph indices with shape (n_atom,) that indicate to which
|
| 66 |
+
# molecule each atom belongs.
|
| 67 |
+
n_graphs = len(batch_indexes)
|
| 68 |
+
batch_data['node_graph_indices'] = np.repeat(
|
| 69 |
+
np.arange(n_graphs), batch_data['n_atom'])
|
| 70 |
+
|
| 71 |
+
batch_data = self.process_data(batch_data)
|
| 72 |
+
|
| 73 |
+
# Keras takes to options, one (x, y) pairs, or just (x,) pairs if we're
|
| 74 |
+
# doing predictions. Here, if we've specified a y matrix, we return the
|
| 75 |
+
# x,y pairs for training, otherwise just return the x data.
|
| 76 |
+
if self._y is not None:
|
| 77 |
+
return (batch_data, np.concatenate(self._y[batch_indexes]).reshape(-1,1))
|
| 78 |
+
|
| 79 |
+
else:
|
| 80 |
+
return batch_data
|
| 81 |
+
|
| 82 |
+
def process_data(self, batch_data):
|
| 83 |
+
""" function to add additional processing to batch data before returning """
|
| 84 |
+
|
| 85 |
+
# These aren't used currently, so I pop them. But we might need them at
|
| 86 |
+
# a later time.
|
| 87 |
+
del batch_data['n_atom']
|
| 88 |
+
del batch_data['n_bond']
|
| 89 |
+
|
| 90 |
+
return batch_data
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def _concat(self, to_stack):
|
| 94 |
+
""" function to stack (or concatentate) depending on dimensions """
|
| 95 |
+
|
| 96 |
+
if np.asarray(to_stack[0]).ndim >= 2:
|
| 97 |
+
return np.concatenate(to_stack)
|
| 98 |
+
|
| 99 |
+
else:
|
| 100 |
+
return np.hstack(to_stack)
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def _compute_stacked_offsets(sizes, repeats):
|
| 104 |
+
""" Computes offsets to add to indices of stacked np arrays.
|
| 105 |
+
When a set of np arrays are stacked, the indices of those from the second on
|
| 106 |
+
must be offset in order to be able to index into the stacked np array. This
|
| 107 |
+
computes those offsets.
|
| 108 |
+
|
| 109 |
+
Args:
|
| 110 |
+
sizes: A 1D sequence of np arrays of the sizes per graph.
|
| 111 |
+
repeats: A 1D sequence of np arrays of the number of repeats per graph.
|
| 112 |
+
Returns:
|
| 113 |
+
The index offset per graph.
|
| 114 |
+
"""
|
| 115 |
+
return np.repeat(np.cumsum(np.hstack([0, sizes[:-1]])), repeats)
|
nfp/preprocessing/test.py
ADDED
|
@@ -0,0 +1,734 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging, sys
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
from tqdm import tqdm
|
| 5 |
+
from scipy.linalg import eigh
|
| 6 |
+
|
| 7 |
+
from rdkit import Chem
|
| 8 |
+
from rdkit.Chem import MolFromSmiles, MolToSmiles, AddHs
|
| 9 |
+
|
| 10 |
+
from nfp.preprocessing import features
|
| 11 |
+
from nfp.preprocessing.features import Tokenizer
|
| 12 |
+
import time
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class SmilesPreprocessor(object):
|
| 16 |
+
""" Given a list of SMILES strings, encode these molecules as atom and
|
| 17 |
+
connectivity feature matricies.
|
| 18 |
+
|
| 19 |
+
Example:
|
| 20 |
+
>>> preprocessor = SmilesPreprocessor(explicit_hs=False)
|
| 21 |
+
>>> inputs = preprocessor.fit(data.smiles)
|
| 22 |
+
"""
|
| 23 |
+
|
| 24 |
+
def __init__(self, explicit_hs=True, atom_features=None, bond_features=None):
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
explicit_hs : bool
|
| 28 |
+
whether to tell RDkit to add H's to a molecule.
|
| 29 |
+
atom_features : function
|
| 30 |
+
A function applied to an rdkit.Atom that returns some
|
| 31 |
+
representation (i.e., string, integer) for the Tokenizer class.
|
| 32 |
+
bond_features : function
|
| 33 |
+
A function applied to an rdkit Bond to return some description.
|
| 34 |
+
|
| 35 |
+
"""
|
| 36 |
+
|
| 37 |
+
self.atom_tokenizer = Tokenizer()
|
| 38 |
+
self.bond_tokenizer = Tokenizer()
|
| 39 |
+
self.explicit_hs = explicit_hs
|
| 40 |
+
|
| 41 |
+
if atom_features is None:
|
| 42 |
+
atom_features = features.atom_features_v1
|
| 43 |
+
|
| 44 |
+
if bond_features is None:
|
| 45 |
+
bond_features = features.bond_features_v1
|
| 46 |
+
|
| 47 |
+
self.atom_features = atom_features
|
| 48 |
+
self.bond_features = bond_features
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def fit(self, smiles_iterator):
|
| 52 |
+
""" Fit an iterator of SMILES strings, creating new atom and bond
|
| 53 |
+
tokens for unseen molecules. Returns a dictionary with 'atom' and
|
| 54 |
+
'connectivity' entries """
|
| 55 |
+
return list(self.preprocess(smiles_iterator, train=True))
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def predict(self, smiles_iterator):
|
| 59 |
+
""" Uses previously determined atom and bond tokens to convert a SMILES
|
| 60 |
+
iterator into 'atom' and 'connectivity' matrices. Ensures that atom and
|
| 61 |
+
bond classes commute with previously determined results. """
|
| 62 |
+
return list(self.preprocess(smiles_iterator, train=False))
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def preprocess(self, smiles_iterator, train=True):
|
| 66 |
+
|
| 67 |
+
self.atom_tokenizer.train = train
|
| 68 |
+
self.bond_tokenizer.train = train
|
| 69 |
+
|
| 70 |
+
for smiles in tqdm(smiles_iterator):
|
| 71 |
+
yield self.construct_feature_matrices(smiles)
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
@property
|
| 75 |
+
def atom_classes(self):
|
| 76 |
+
""" The number of atom types found (includes the 0 null-atom type) """
|
| 77 |
+
return self.atom_tokenizer.num_classes + 1
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
@property
|
| 81 |
+
def bond_classes(self):
|
| 82 |
+
""" The number of bond types found (includes the 0 null-bond type) """
|
| 83 |
+
return self.bond_tokenizer.num_classes + 1
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def construct_feature_matrices(self, smiles):
|
| 87 |
+
""" construct a molecule from the given smiles string and return atom
|
| 88 |
+
and bond classes.
|
| 89 |
+
|
| 90 |
+
Returns
|
| 91 |
+
dict with entries
|
| 92 |
+
'n_atom' : number of atoms in the molecule
|
| 93 |
+
'n_bond' : number of bonds in the molecule
|
| 94 |
+
'atom' : (n_atom,) length list of atom classes
|
| 95 |
+
'bond' : (n_bond,) list of bond classes
|
| 96 |
+
'connectivity' : (n_bond, 2) array of source atom, target atom pairs.
|
| 97 |
+
|
| 98 |
+
"""
|
| 99 |
+
|
| 100 |
+
mol = MolFromSmiles(smiles)
|
| 101 |
+
if self.explicit_hs:
|
| 102 |
+
mol = AddHs(mol)
|
| 103 |
+
|
| 104 |
+
n_atom = len(mol.GetAtoms())
|
| 105 |
+
n_bond = 2 * len(mol.GetBonds())
|
| 106 |
+
|
| 107 |
+
# If its an isolated atom, add a self-link
|
| 108 |
+
if n_bond == 0:
|
| 109 |
+
n_bond = 1
|
| 110 |
+
|
| 111 |
+
atom_feature_matrix = np.zeros(n_atom, dtype='int')
|
| 112 |
+
bond_feature_matrix = np.zeros(n_bond, dtype='int')
|
| 113 |
+
connectivity = np.zeros((n_bond, 2), dtype='int')
|
| 114 |
+
|
| 115 |
+
bond_index = 0
|
| 116 |
+
|
| 117 |
+
atom_seq = mol.GetAtoms()
|
| 118 |
+
atoms = [atom_seq[i] for i in range(n_atom)]
|
| 119 |
+
|
| 120 |
+
for n, atom in enumerate(atoms):
|
| 121 |
+
|
| 122 |
+
# Atom Classes
|
| 123 |
+
atom_feature_matrix[n] = self.atom_tokenizer(
|
| 124 |
+
self.atom_features(atom))
|
| 125 |
+
|
| 126 |
+
start_index = atom.GetIdx()
|
| 127 |
+
|
| 128 |
+
for bond in atom.GetBonds():
|
| 129 |
+
# Is the bond pointing at the target atom
|
| 130 |
+
rev = bond.GetBeginAtomIdx() != start_index
|
| 131 |
+
|
| 132 |
+
# Bond Classes
|
| 133 |
+
bond_feature_matrix[n] = self.bond_tokenizer(
|
| 134 |
+
self.bond_features(bond, flipped=rev))
|
| 135 |
+
|
| 136 |
+
# Connectivity
|
| 137 |
+
if not rev: # Original direction
|
| 138 |
+
connectivity[bond_index, 0] = bond.GetBeginAtomIdx()
|
| 139 |
+
connectivity[bond_index, 1] = bond.GetEndAtomIdx()
|
| 140 |
+
|
| 141 |
+
else: # Reversed
|
| 142 |
+
connectivity[bond_index, 0] = bond.GetEndAtomIdx()
|
| 143 |
+
connectivity[bond_index, 1] = bond.GetBeginAtomIdx()
|
| 144 |
+
|
| 145 |
+
bond_index += 1
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
return {
|
| 149 |
+
'n_atom': n_atom,
|
| 150 |
+
'n_bond': n_bond,
|
| 151 |
+
'atom': atom_feature_matrix,
|
| 152 |
+
'bond': bond_feature_matrix,
|
| 153 |
+
'connectivity': connectivity,
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
class ConnectivityAPreprocessor(object):
|
| 158 |
+
""" Given a list of SMILES strings, encode these molecules as atom and
|
| 159 |
+
connectivity feature matricies.
|
| 160 |
+
|
| 161 |
+
Example:
|
| 162 |
+
>>> preprocessor = SmilesPreprocessor(explicit_hs=False)
|
| 163 |
+
>>> inputs = preprocessor.fit(data.smiles)
|
| 164 |
+
"""
|
| 165 |
+
|
| 166 |
+
def __init__(self, explicit_hs=True, atom_features=None, bond_features=None):
|
| 167 |
+
"""
|
| 168 |
+
|
| 169 |
+
explicit_hs : bool
|
| 170 |
+
whether to tell RDkit to add H's to a molecule.
|
| 171 |
+
atom_features : function
|
| 172 |
+
A function applied to an rdkit.Atom that returns some
|
| 173 |
+
representation (i.e., string, integer) for the Tokenizer class.
|
| 174 |
+
bond_features : function
|
| 175 |
+
A function applied to an rdkit Bond to return some description.
|
| 176 |
+
|
| 177 |
+
"""
|
| 178 |
+
|
| 179 |
+
self.atom_tokenizer = Tokenizer()
|
| 180 |
+
self.bond_tokenizer = Tokenizer()
|
| 181 |
+
self.explicit_hs = explicit_hs
|
| 182 |
+
|
| 183 |
+
if atom_features is None:
|
| 184 |
+
atom_features = features.atom_features_v1
|
| 185 |
+
|
| 186 |
+
if bond_features is None:
|
| 187 |
+
bond_features = features.bond_features_v1
|
| 188 |
+
|
| 189 |
+
self.atom_features = atom_features
|
| 190 |
+
self.bond_features = bond_features
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
def fit(self, smiles_iterator):
|
| 194 |
+
""" Fit an iterator of SMILES strings, creating new atom and bond
|
| 195 |
+
tokens for unseen molecules. Returns a dictionary with 'atom' and
|
| 196 |
+
'connectivity' entries """
|
| 197 |
+
return list(self.preprocess(smiles_iterator, train=True))
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
def predict(self, smiles_iterator):
|
| 201 |
+
""" Uses previously determined atom and bond tokens to convert a SMILES
|
| 202 |
+
iterator into 'atom' and 'connectivity' matrices. Ensures that atom and
|
| 203 |
+
bond classes commute with previously determined results. """
|
| 204 |
+
return list(self.preprocess(smiles_iterator, train=False))
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
def preprocess(self, smiles_iterator, train=True):
|
| 208 |
+
|
| 209 |
+
self.atom_tokenizer.train = train
|
| 210 |
+
self.bond_tokenizer.train = train
|
| 211 |
+
|
| 212 |
+
for smiles in tqdm(smiles_iterator):
|
| 213 |
+
yield self.construct_feature_matrices(smiles)
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
@property
|
| 217 |
+
def atom_classes(self):
|
| 218 |
+
""" The number of atom types found (includes the 0 null-atom type) """
|
| 219 |
+
return self.atom_tokenizer.num_classes + 1
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
@property
|
| 223 |
+
def bond_classes(self):
|
| 224 |
+
""" The number of bond types found (includes the 0 null-bond type) """
|
| 225 |
+
return self.bond_tokenizer.num_classes + 1
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
def construct_feature_matrices(self, smiles):
|
| 229 |
+
""" construct a molecule from the given smiles string and return atom
|
| 230 |
+
and bond classes.
|
| 231 |
+
|
| 232 |
+
Returns
|
| 233 |
+
dict with entries
|
| 234 |
+
'n_atom' : number of atoms in the molecule
|
| 235 |
+
'n_bond' : number of bonds in the molecule
|
| 236 |
+
'atom' : (n_atom,) length list of atom classes
|
| 237 |
+
'bond' : (n_bond,) list of bond classes
|
| 238 |
+
'connectivity' : (n_bond, 2) array of source atom, target atom pairs.
|
| 239 |
+
|
| 240 |
+
"""
|
| 241 |
+
|
| 242 |
+
mol = MolFromSmiles(smiles)
|
| 243 |
+
if self.explicit_hs:
|
| 244 |
+
mol = AddHs(mol)
|
| 245 |
+
|
| 246 |
+
n_atom = len(mol.GetAtoms())
|
| 247 |
+
n_bond = 2 * len(mol.GetBonds())
|
| 248 |
+
|
| 249 |
+
# If its an isolated atom, add a self-link
|
| 250 |
+
if n_bond == 0:
|
| 251 |
+
n_bond = 1
|
| 252 |
+
|
| 253 |
+
atom_feature_matrix = np.zeros(n_atom, dtype='int')
|
| 254 |
+
bond_feature_matrix = np.zeros(n_bond, dtype='int')
|
| 255 |
+
connectivity = np.zeros((n_bond, 2), dtype='int')
|
| 256 |
+
|
| 257 |
+
bond_index = 0
|
| 258 |
+
|
| 259 |
+
atom_seq = mol.GetAtoms()
|
| 260 |
+
atoms = [atom_seq[i] for i in range(n_atom)]
|
| 261 |
+
|
| 262 |
+
for n, atom in enumerate(atoms):
|
| 263 |
+
|
| 264 |
+
# Atom Classes
|
| 265 |
+
atom_feature_matrix[n] = self.atom_tokenizer(
|
| 266 |
+
self.atom_features(atom))
|
| 267 |
+
|
| 268 |
+
start_index = atom.GetIdx()
|
| 269 |
+
|
| 270 |
+
for bond in atom.GetBonds():
|
| 271 |
+
# Is the bond pointing at the target atom
|
| 272 |
+
rev = bond.GetBeginAtomIdx() != start_index
|
| 273 |
+
|
| 274 |
+
# Bond Classes
|
| 275 |
+
bond_feature_matrix[n] = self.bond_tokenizer(
|
| 276 |
+
self.bond_features(bond, flipped=rev))
|
| 277 |
+
|
| 278 |
+
# Connectivity
|
| 279 |
+
if not rev: # Original direction
|
| 280 |
+
connectivity[bond_index, 0] = bond.GetBeginAtomIdx()
|
| 281 |
+
connectivity[bond_index, 1] = bond.GetEndAtomIdx()
|
| 282 |
+
|
| 283 |
+
else: # Reversed
|
| 284 |
+
connectivity[bond_index, 0] = bond.GetEndAtomIdx()
|
| 285 |
+
connectivity[bond_index, 1] = bond.GetBeginAtomIdx()
|
| 286 |
+
|
| 287 |
+
bond_index += 1
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
return {
|
| 291 |
+
'n_atom': n_atom,
|
| 292 |
+
'n_bond': n_bond,
|
| 293 |
+
'atom': atom_feature_matrix,
|
| 294 |
+
'bond': bond_feature_matrix,
|
| 295 |
+
'connectivity': connectivity,
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
|
| 299 |
+
class MolPreprocessor(SmilesPreprocessor):
|
| 300 |
+
""" I should refactor this into a base class and separate
|
| 301 |
+
SmilesPreprocessor classes. But the idea is that we only need to redefine
|
| 302 |
+
the `construct_feature_matrices` method to have a working preprocessor that
|
| 303 |
+
handles 3D structures.
|
| 304 |
+
|
| 305 |
+
We'll pass an iterator of mol objects instead of SMILES strings this time,
|
| 306 |
+
though.
|
| 307 |
+
|
| 308 |
+
"""
|
| 309 |
+
|
| 310 |
+
def __init__(self, n_neighbors, cutoff, **kwargs):
|
| 311 |
+
""" A preprocessor class that also returns distances between
|
| 312 |
+
neighboring atoms. Adds edges for non-bonded atoms to include a maximum
|
| 313 |
+
of n_neighbors around each atom """
|
| 314 |
+
|
| 315 |
+
self.n_neighbors = n_neighbors
|
| 316 |
+
self.cutoff = cutoff
|
| 317 |
+
super(MolPreprocessor, self).__init__(**kwargs)
|
| 318 |
+
|
| 319 |
+
|
| 320 |
+
def construct_feature_matrices(self, mol):
|
| 321 |
+
""" Given an rdkit mol, return atom feature matrices, bond feature
|
| 322 |
+
matrices, and connectivity matrices.
|
| 323 |
+
|
| 324 |
+
Returns
|
| 325 |
+
dict with entries
|
| 326 |
+
'n_atom' : number of atoms in the molecule
|
| 327 |
+
'n_bond' : number of edges (likely n_atom * n_neighbors)
|
| 328 |
+
'atom' : (n_atom,) length list of atom classes
|
| 329 |
+
'bond' : (n_bond,) list of bond classes. 0 for no bond
|
| 330 |
+
'distance' : (n_bond,) list of bond distances
|
| 331 |
+
'connectivity' : (n_bond, 2) array of source atom, target atom pairs.
|
| 332 |
+
|
| 333 |
+
"""
|
| 334 |
+
|
| 335 |
+
n_atom = len(mol.GetAtoms())
|
| 336 |
+
|
| 337 |
+
# n_bond is actually the number of atom-atom pairs, so this is defined
|
| 338 |
+
# by the number of neighbors for each atom.
|
| 339 |
+
#if there is cutoff,
|
| 340 |
+
distance_matrix = Chem.Get3DDistanceMatrix(mol)
|
| 341 |
+
|
| 342 |
+
if self.n_neighbors <= (n_atom - 1):
|
| 343 |
+
n_bond = self.n_neighbors * n_atom
|
| 344 |
+
else:
|
| 345 |
+
# If there are fewer atoms than n_neighbors, all atoms will be
|
| 346 |
+
# connected
|
| 347 |
+
n_bond = distance_matrix[(distance_matrix < self.cutoff) & (distance_matrix != 0)].size
|
| 348 |
+
|
| 349 |
+
if n_bond == 0: n_bond = 1
|
| 350 |
+
|
| 351 |
+
# Initialize the matrices to be filled in during the following loop.
|
| 352 |
+
atom_feature_matrix = np.zeros(n_atom, dtype='int')
|
| 353 |
+
bond_feature_matrix = np.zeros(n_bond, dtype='int')
|
| 354 |
+
bond_distance_matrix = np.zeros(n_bond, dtype=np.float32)
|
| 355 |
+
connectivity = np.zeros((n_bond, 2), dtype='int')
|
| 356 |
+
|
| 357 |
+
# Hopefully we've filtered out all problem mols by now.
|
| 358 |
+
if mol is None:
|
| 359 |
+
raise RuntimeError("Issue in loading mol")
|
| 360 |
+
|
| 361 |
+
# Get a list of the atoms in the molecule.
|
| 362 |
+
atom_seq = mol.GetAtoms()
|
| 363 |
+
atoms = [atom_seq[i] for i in range(n_atom)]
|
| 364 |
+
|
| 365 |
+
# Here we loop over each atom, and the inner loop iterates over each
|
| 366 |
+
# neighbor of the current atom.
|
| 367 |
+
bond_index = 0 # keep track of our current bond.
|
| 368 |
+
for n, atom in enumerate(atoms):
|
| 369 |
+
|
| 370 |
+
# update atom feature matrix
|
| 371 |
+
atom_feature_matrix[n] = self.atom_tokenizer(
|
| 372 |
+
self.atom_features(atom))
|
| 373 |
+
|
| 374 |
+
# if n_neighbors is greater than total atoms, then each atom is a
|
| 375 |
+
# neighbor.
|
| 376 |
+
if (self.n_neighbors + 1) > len(mol.GetAtoms()):
|
| 377 |
+
neighbor_end_index = len(mol.GetAtoms())
|
| 378 |
+
else:
|
| 379 |
+
neighbor_end_index = (self.n_neighbors + 1)
|
| 380 |
+
|
| 381 |
+
distance_atom = distance_matrix[n, :]
|
| 382 |
+
cutoff_end_index = distance_atom[distance_atom < self.cutoff].size
|
| 383 |
+
|
| 384 |
+
end_index = min(neighbor_end_index, cutoff_end_index)
|
| 385 |
+
|
| 386 |
+
# Loop over each of the nearest neighbors
|
| 387 |
+
|
| 388 |
+
neighbor_inds = distance_matrix[n, :].argsort()[1:end_index]
|
| 389 |
+
if len(neighbor_inds)==0: neighbor_inds = [n]
|
| 390 |
+
for neighbor in neighbor_inds:
|
| 391 |
+
|
| 392 |
+
# update bond feature matrix
|
| 393 |
+
bond = mol.GetBondBetweenAtoms(n, int(neighbor))
|
| 394 |
+
if bond is None:
|
| 395 |
+
bond_feature_matrix[bond_index] = 0
|
| 396 |
+
else:
|
| 397 |
+
rev = False if bond.GetBeginAtomIdx() == n else True
|
| 398 |
+
bond_feature_matrix[bond_index] = self.bond_tokenizer(
|
| 399 |
+
self.bond_features(bond, flipped=rev))
|
| 400 |
+
|
| 401 |
+
distance = distance_matrix[n, neighbor]
|
| 402 |
+
bond_distance_matrix[bond_index] = distance
|
| 403 |
+
|
| 404 |
+
# update connectivity matrix
|
| 405 |
+
connectivity[bond_index, 0] = n
|
| 406 |
+
connectivity[bond_index, 1] = neighbor
|
| 407 |
+
|
| 408 |
+
bond_index += 1
|
| 409 |
+
|
| 410 |
+
return {
|
| 411 |
+
'n_atom': n_atom,
|
| 412 |
+
'n_bond': n_bond,
|
| 413 |
+
'atom': atom_feature_matrix,
|
| 414 |
+
'bond': bond_feature_matrix,
|
| 415 |
+
'distance': bond_distance_matrix,
|
| 416 |
+
'connectivity': connectivity,
|
| 417 |
+
}
|
| 418 |
+
|
| 419 |
+
|
| 420 |
+
class MolBPreprocessor(MolPreprocessor):
|
| 421 |
+
"""
|
| 422 |
+
This is a subclass of Molpreprocessor that preprocessor molecule with
|
| 423 |
+
bond property target
|
| 424 |
+
"""
|
| 425 |
+
def __init__(self, **kwargs):
|
| 426 |
+
"""
|
| 427 |
+
A preprocessor class that also returns bond_target_matrix, besides the bond matrix
|
| 428 |
+
returned by MolPreprocessor. The bond_target_matrix is then used as ref to reduce molecule
|
| 429 |
+
to bond property
|
| 430 |
+
"""
|
| 431 |
+
super(MolBPreprocessor, self).__init__(**kwargs)
|
| 432 |
+
|
| 433 |
+
def construct_feature_matrices(self, entry):
|
| 434 |
+
"""
|
| 435 |
+
Given an entry contining rdkit molecule, bond_index and for the target property,
|
| 436 |
+
return atom
|
| 437 |
+
feature matrices, bond feature matrices, distance matrices, connectivity matrices and bond
|
| 438 |
+
ref matrices.
|
| 439 |
+
|
| 440 |
+
returns
|
| 441 |
+
dict with entries
|
| 442 |
+
see MolPreproccessor
|
| 443 |
+
'bond_index' : ref array to the bond index
|
| 444 |
+
"""
|
| 445 |
+
mol, bond_index_array = entry
|
| 446 |
+
|
| 447 |
+
n_atom = len(mol.GetAtoms())
|
| 448 |
+
n_pro = len(bond_index_array)
|
| 449 |
+
|
| 450 |
+
# n_bond is actually the number of atom-atom pairs, so this is defined
|
| 451 |
+
# by the number of neighbors for each atom.
|
| 452 |
+
#if there is cutoff,
|
| 453 |
+
distance_matrix = Chem.Get3DDistanceMatrix(mol)
|
| 454 |
+
|
| 455 |
+
if self.n_neighbors <= (n_atom - 1):
|
| 456 |
+
n_bond = self.n_neighbors * n_atom
|
| 457 |
+
else:
|
| 458 |
+
# If there are fewer atoms than n_neighbors, all atoms will be
|
| 459 |
+
# connected
|
| 460 |
+
n_bond = distance_matrix[(distance_matrix < self.cutoff) & (distance_matrix != 0)].size
|
| 461 |
+
|
| 462 |
+
if n_bond == 0: n_bond = 1
|
| 463 |
+
|
| 464 |
+
# Initialize the matrices to be filled in during the following loop.
|
| 465 |
+
atom_feature_matrix = np.zeros(n_atom, dtype='int')
|
| 466 |
+
bond_feature_matrix = np.zeros(n_bond, dtype='int')
|
| 467 |
+
bond_distance_matrix = np.zeros(n_bond, dtype=np.float32)
|
| 468 |
+
bond_index_matrix = np.full(n_bond, -1, dtype='int')
|
| 469 |
+
connectivity = np.zeros((n_bond, 2), dtype='int')
|
| 470 |
+
|
| 471 |
+
# Hopefully we've filtered out all problem mols by now.
|
| 472 |
+
if mol is None:
|
| 473 |
+
raise RuntimeError("Issue in loading mol")
|
| 474 |
+
|
| 475 |
+
# Get a list of the atoms in the molecule.
|
| 476 |
+
atom_seq = mol.GetAtoms()
|
| 477 |
+
atoms = [atom_seq[i] for i in range(n_atom)]
|
| 478 |
+
|
| 479 |
+
# Here we loop over each atom, and the inner loop iterates over each
|
| 480 |
+
# neighbor of the current atom.
|
| 481 |
+
bond_index = 0 # keep track of our current bond.
|
| 482 |
+
for n, atom in enumerate(atoms):
|
| 483 |
+
# update atom feature matrix
|
| 484 |
+
atom_feature_matrix[n] = self.atom_tokenizer(
|
| 485 |
+
self.atom_features(atom))
|
| 486 |
+
|
| 487 |
+
# if n_neighbors is greater than total atoms, then each atom is a
|
| 488 |
+
# neighbor.
|
| 489 |
+
if (self.n_neighbors + 1) > len(mol.GetAtoms()):
|
| 490 |
+
neighbor_end_index = len(mol.GetAtoms())
|
| 491 |
+
else:
|
| 492 |
+
neighbor_end_index = (self.n_neighbors + 1)
|
| 493 |
+
|
| 494 |
+
distance_atom = distance_matrix[n, :]
|
| 495 |
+
cutoff_end_index = distance_atom[distance_atom < self.cutoff].size
|
| 496 |
+
|
| 497 |
+
end_index = min(neighbor_end_index, cutoff_end_index)
|
| 498 |
+
|
| 499 |
+
# Loop over each of the nearest neighbors
|
| 500 |
+
|
| 501 |
+
neighbor_inds = distance_matrix[n, :].argsort()[1:end_index]
|
| 502 |
+
if len(neighbor_inds)==0: neighbor_inds = [n]
|
| 503 |
+
for neighbor in neighbor_inds:
|
| 504 |
+
|
| 505 |
+
# update bond feature matrix
|
| 506 |
+
bond = mol.GetBondBetweenAtoms(n, int(neighbor))
|
| 507 |
+
if bond is None:
|
| 508 |
+
bond_feature_matrix[bond_index] = 0
|
| 509 |
+
else:
|
| 510 |
+
rev = False if bond.GetBeginAtomIdx() == n else True
|
| 511 |
+
bond_feature_matrix[bond_index] = self.bond_tokenizer(
|
| 512 |
+
self.bond_features(bond, flipped=rev))
|
| 513 |
+
try:
|
| 514 |
+
bond_index_matrix[bond_index] = bond_index_array.tolist().index(bond.GetIdx())
|
| 515 |
+
except:
|
| 516 |
+
pass
|
| 517 |
+
|
| 518 |
+
distance = distance_matrix[n, neighbor]
|
| 519 |
+
bond_distance_matrix[bond_index] = distance
|
| 520 |
+
|
| 521 |
+
# update connectivity matrix
|
| 522 |
+
connectivity[bond_index, 0] = n
|
| 523 |
+
connectivity[bond_index, 1] = neighbor
|
| 524 |
+
|
| 525 |
+
bond_index += 1
|
| 526 |
+
return {
|
| 527 |
+
'n_atom': n_atom,
|
| 528 |
+
'n_bond': n_bond,
|
| 529 |
+
'n_pro': n_pro,
|
| 530 |
+
'atom': atom_feature_matrix,
|
| 531 |
+
'bond': bond_feature_matrix,
|
| 532 |
+
'distance': bond_distance_matrix,
|
| 533 |
+
'connectivity': connectivity,
|
| 534 |
+
'bond_index': bond_index_matrix,
|
| 535 |
+
}
|
| 536 |
+
|
| 537 |
+
class MolAPreprocessor(MolPreprocessor):
|
| 538 |
+
"""
|
| 539 |
+
This is a subclass of Molpreprocessor that preprocessor molecule with
|
| 540 |
+
bond property target
|
| 541 |
+
"""
|
| 542 |
+
def __init__(self, **kwargs):
|
| 543 |
+
"""
|
| 544 |
+
A preprocessor class that also returns bond_target_matrix, besides the bond matrix
|
| 545 |
+
returned by MolPreprocessor. The bond_target_matrix is then used as ref to reduce molecule
|
| 546 |
+
to bond property
|
| 547 |
+
"""
|
| 548 |
+
super(MolAPreprocessor, self).__init__(**kwargs)
|
| 549 |
+
|
| 550 |
+
def construct_feature_matrices(self, entry):
|
| 551 |
+
"""
|
| 552 |
+
Given an entry contining rdkit molecule, bond_index and for the target property,
|
| 553 |
+
return atom
|
| 554 |
+
feature matrices, bond feature matrices, distance matrices, connectivity matrices and bond
|
| 555 |
+
ref matrices.
|
| 556 |
+
|
| 557 |
+
returns
|
| 558 |
+
dict with entries
|
| 559 |
+
see MolPreproccessor
|
| 560 |
+
'bond_index' : ref array to the bond index
|
| 561 |
+
"""
|
| 562 |
+
mol, atom_index_array = entry
|
| 563 |
+
|
| 564 |
+
n_atom = len(mol.GetAtoms())
|
| 565 |
+
n_pro = len(atom_index_array)
|
| 566 |
+
|
| 567 |
+
# n_bond is actually the number of atom-atom pairs, so this is defined
|
| 568 |
+
# by the number of neighbors for each atom.
|
| 569 |
+
#if there is cutoff,
|
| 570 |
+
distance_matrix = Chem.Get3DDistanceMatrix(mol)
|
| 571 |
+
|
| 572 |
+
if self.n_neighbors <= (n_atom - 1):
|
| 573 |
+
n_bond = self.n_neighbors * n_atom
|
| 574 |
+
else:
|
| 575 |
+
# If there are fewer atoms than n_neighbors, all atoms will be
|
| 576 |
+
# connected
|
| 577 |
+
n_bond = distance_matrix[(distance_matrix < self.cutoff) & (distance_matrix != 0)].size
|
| 578 |
+
|
| 579 |
+
if n_bond == 0: n_bond = 1
|
| 580 |
+
|
| 581 |
+
# Initialize the matrices to be filled in during the following loop.
|
| 582 |
+
atom_feature_matrix = np.zeros(n_atom, dtype='int')
|
| 583 |
+
bond_feature_matrix = np.zeros(n_bond, dtype='int')
|
| 584 |
+
bond_distance_matrix = np.zeros(n_bond, dtype=np.float32)
|
| 585 |
+
atom_index_matrix = np.full(n_atom, -1, dtype='int')
|
| 586 |
+
connectivity = np.zeros((n_bond, 2), dtype='int')
|
| 587 |
+
|
| 588 |
+
# Hopefully we've filtered out all problem mols by now.
|
| 589 |
+
if mol is None:
|
| 590 |
+
raise RuntimeError("Issue in loading mol")
|
| 591 |
+
|
| 592 |
+
# Get a list of the atoms in the molecule.
|
| 593 |
+
atom_seq = mol.GetAtoms()
|
| 594 |
+
atoms = [atom_seq[i] for i in range(n_atom)]
|
| 595 |
+
|
| 596 |
+
# Here we loop over each atom, and the inner loop iterates over each
|
| 597 |
+
# neighbor of the current atom.
|
| 598 |
+
bond_index = 0 # keep track of our current bond.
|
| 599 |
+
for n, atom in enumerate(atoms):
|
| 600 |
+
# update atom feature matrix
|
| 601 |
+
atom_feature_matrix[n] = self.atom_tokenizer(
|
| 602 |
+
self.atom_features(atom))
|
| 603 |
+
try:
|
| 604 |
+
atom_index_matrix[n] = atom_index_array.tolist().index(atom.GetIdx())
|
| 605 |
+
except:
|
| 606 |
+
pass
|
| 607 |
+
# if n_neighbors is greater than total atoms, then each atom is a
|
| 608 |
+
# neighbor.
|
| 609 |
+
if (self.n_neighbors + 1) > len(mol.GetAtoms()):
|
| 610 |
+
neighbor_end_index = len(mol.GetAtoms())
|
| 611 |
+
else:
|
| 612 |
+
neighbor_end_index = (self.n_neighbors + 1)
|
| 613 |
+
|
| 614 |
+
distance_atom = distance_matrix[n, :]
|
| 615 |
+
cutoff_end_index = distance_atom[distance_atom < self.cutoff].size
|
| 616 |
+
|
| 617 |
+
end_index = min(neighbor_end_index, cutoff_end_index)
|
| 618 |
+
|
| 619 |
+
# Loop over each of the nearest neighbors
|
| 620 |
+
|
| 621 |
+
neighbor_inds = distance_matrix[n, :].argsort()[1:end_index]
|
| 622 |
+
if len(neighbor_inds)==0: neighbor_inds = [n]
|
| 623 |
+
for neighbor in neighbor_inds:
|
| 624 |
+
|
| 625 |
+
# update bond feature matrix
|
| 626 |
+
bond = mol.GetBondBetweenAtoms(n, int(neighbor))
|
| 627 |
+
if bond is None:
|
| 628 |
+
bond_feature_matrix[bond_index] = 0
|
| 629 |
+
else:
|
| 630 |
+
rev = False if bond.GetBeginAtomIdx() == n else True
|
| 631 |
+
bond_feature_matrix[bond_index] = self.bond_tokenizer(
|
| 632 |
+
self.bond_features(bond, flipped=rev))
|
| 633 |
+
|
| 634 |
+
distance = distance_matrix[n, neighbor]
|
| 635 |
+
bond_distance_matrix[bond_index] = distance
|
| 636 |
+
|
| 637 |
+
# update connectivity matrix
|
| 638 |
+
connectivity[bond_index, 0] = n
|
| 639 |
+
connectivity[bond_index, 1] = neighbor
|
| 640 |
+
|
| 641 |
+
bond_index += 1
|
| 642 |
+
return {
|
| 643 |
+
'n_atom': n_atom,
|
| 644 |
+
'n_bond': n_bond,
|
| 645 |
+
'n_pro': n_pro,
|
| 646 |
+
'atom': atom_feature_matrix,
|
| 647 |
+
'bond': bond_feature_matrix,
|
| 648 |
+
'distance': bond_distance_matrix,
|
| 649 |
+
'connectivity': connectivity,
|
| 650 |
+
'atom_index': atom_index_matrix,
|
| 651 |
+
}
|
| 652 |
+
|
| 653 |
+
|
| 654 |
+
# TODO: rewrite this
|
| 655 |
+
# class LaplacianSmilesPreprocessor(SmilesPreprocessor):
|
| 656 |
+
# """ Extends the SmilesPreprocessor class to also return eigenvalues and
|
| 657 |
+
# eigenvectors of the graph laplacian matrix.
|
| 658 |
+
#
|
| 659 |
+
# Example:
|
| 660 |
+
# >>> preprocessor = SmilesPreprocessor(
|
| 661 |
+
# >>> max_atoms=55, max_bonds=62, max_degree=4, explicit_hs=False)
|
| 662 |
+
# >>> atom, connectivity, eigenvalues, eigenvectors = preprocessor.fit(
|
| 663 |
+
# data.smiles)
|
| 664 |
+
# """
|
| 665 |
+
#
|
| 666 |
+
# def preprocess(self, smiles_iterator, train=True):
|
| 667 |
+
#
|
| 668 |
+
# self.atom_tokenizer.train = train
|
| 669 |
+
# self.bond_tokenizer.train = train
|
| 670 |
+
#
|
| 671 |
+
# for smiles in tqdm(smiles_iterator):
|
| 672 |
+
# G = self._mol_to_nx(smiles)
|
| 673 |
+
# A = self._get_atom_feature_matrix(G)
|
| 674 |
+
# C = self._get_connectivity_matrix(G)
|
| 675 |
+
# W, V = self._get_laplacian_spectral_decomp(G)
|
| 676 |
+
# yield A, C, W, V
|
| 677 |
+
#
|
| 678 |
+
#
|
| 679 |
+
# def _get_laplacian_spectral_decomp(self, G):
|
| 680 |
+
# """ Return the eigenvalues and eigenvectors of the graph G, padded to
|
| 681 |
+
# `self.max_atoms`.
|
| 682 |
+
# """
|
| 683 |
+
#
|
| 684 |
+
# w0 = np.zeros((self.max_atoms, 1))
|
| 685 |
+
# v0 = np.zeros((self.max_atoms, self.max_atoms))
|
| 686 |
+
#
|
| 687 |
+
# w, v = eigh(nx.laplacian_matrix(G).todense())
|
| 688 |
+
#
|
| 689 |
+
# num_atoms = len(v)
|
| 690 |
+
#
|
| 691 |
+
# w0[:num_atoms, 0] = w
|
| 692 |
+
# v0[:num_atoms, :num_atoms] = v
|
| 693 |
+
#
|
| 694 |
+
# return w0, v0
|
| 695 |
+
#
|
| 696 |
+
#
|
| 697 |
+
# def fit(self, smiles_iterator):
|
| 698 |
+
# results = self._fit(smiles_iterator)
|
| 699 |
+
# return {'atom': results[0],
|
| 700 |
+
# 'connectivity': results[1],
|
| 701 |
+
# 'w': results[2],
|
| 702 |
+
# 'v': results[3]}
|
| 703 |
+
#
|
| 704 |
+
#
|
| 705 |
+
# def predict(self, smiles_iterator):
|
| 706 |
+
# results = self._predict(smiles_iterator)
|
| 707 |
+
# return {'atom': results[0],
|
| 708 |
+
# 'connectivity': results[1],
|
| 709 |
+
# 'w': results[2],
|
| 710 |
+
# 'v': results[3]}
|
| 711 |
+
|
| 712 |
+
|
| 713 |
+
def get_max_atom_bond_size(smiles_iterator, explicit_hs=True):
|
| 714 |
+
""" Convienence function to get max_atoms, max_bonds for a set of input
|
| 715 |
+
SMILES """
|
| 716 |
+
|
| 717 |
+
max_atoms = 0
|
| 718 |
+
max_bonds = 0
|
| 719 |
+
for smiles in tqdm(smiles_iterator):
|
| 720 |
+
mol = MolFromSmiles(smiles)
|
| 721 |
+
if explicit_hs:
|
| 722 |
+
mol = AddHs(mol)
|
| 723 |
+
max_atoms = max([max_atoms, len(mol.GetAtoms())])
|
| 724 |
+
max_bonds = max([max_bonds, len(mol.GetBonds())])
|
| 725 |
+
|
| 726 |
+
return dict(max_atoms=max_atoms, max_bonds=max_bonds*2)
|
| 727 |
+
|
| 728 |
+
|
| 729 |
+
def canonicalize_smiles(smiles, isomeric=True, sanitize=True):
|
| 730 |
+
try:
|
| 731 |
+
mol = MolFromSmiles(smiles, sanitize=sanitize)
|
| 732 |
+
return MolToSmiles(mol, isomericSmiles=isomeric)
|
| 733 |
+
except Exception:
|
| 734 |
+
pass
|
requirements.txt
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
flask==2.0.3
|
| 2 |
+
Werkzeug==2.0.3
|
| 3 |
+
redis==3.5.3
|
| 4 |
+
gunicorn==20.1.0
|
| 5 |
+
|
| 6 |
+
# RDKit — last version with a Python 3.7 / manylinux amd64 wheel
|
| 7 |
+
rdkit-pypi==2022.3.5
|
| 8 |
+
|
| 9 |
+
# TF1 CPU — pinned to match the trained model
|
| 10 |
+
tensorflow-cpu==1.15.0
|
| 11 |
+
keras==2.2.5
|
| 12 |
+
h5py==2.10.0
|
| 13 |
+
|
| 14 |
+
# protobuf must be pinned — versions >=3.21 break TF1.15
|
| 15 |
+
protobuf==3.20.3
|
| 16 |
+
|
| 17 |
+
# Scientific stack — pinned for Python 3.7 / TF1 compatibility
|
| 18 |
+
numpy==1.16.6
|
| 19 |
+
pandas==0.25.3
|
| 20 |
+
scipy==1.3.3
|
| 21 |
+
scikit-learn==0.21.3
|
| 22 |
+
tqdm==4.64.1
|
| 23 |
+
|
| 24 |
+
# nfp is vendored locally — no PyPI entry needed
|
setup_assets.sh
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# CASCADE HF Space — Asset Setup Script
|
| 3 |
+
# Run once from your terminal: bash setup_assets.sh
|
| 4 |
+
set -e
|
| 5 |
+
|
| 6 |
+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
| 7 |
+
NOVA="$SCRIPT_DIR/../NNPredictor/webapp_NMR/Predictor"
|
| 8 |
+
HF="$SCRIPT_DIR"
|
| 9 |
+
|
| 10 |
+
echo "=== CASCADE HF Space Asset Setup ==="
|
| 11 |
+
echo "Source: $NOVA"
|
| 12 |
+
echo "Target: $HF"
|
| 13 |
+
echo ""
|
| 14 |
+
|
| 15 |
+
# ── 1. Python inference code (with preprocessor path fix) ──────────────────
|
| 16 |
+
echo "[1/5] Copying inference code..."
|
| 17 |
+
sed "s|os.path.join('cascade', 'preprocessor.p')|os.path.join('NMR_Prediction', 'preprocessor.p')|g" \
|
| 18 |
+
"$NOVA/NMR_Prediction/apply.py" > "$HF/NMR_Prediction/apply.py"
|
| 19 |
+
cp "$NOVA/NMR_Prediction/genConf.py" "$HF/NMR_Prediction/genConf.py"
|
| 20 |
+
echo " apply.py ✓ (preprocessor path fixed)"
|
| 21 |
+
echo " genConf.py ✓"
|
| 22 |
+
|
| 23 |
+
# ── 2. Preprocessor pickle ─────────────────────────────────────────────────
|
| 24 |
+
echo "[2/5] Copying preprocessor..."
|
| 25 |
+
cp "$NOVA/NMR_Prediction/preprocessor.p" "$HF/NMR_Prediction/preprocessor.p"
|
| 26 |
+
echo " preprocessor.p ✓ ($(du -sh "$HF/NMR_Prediction/preprocessor.p" | cut -f1))"
|
| 27 |
+
|
| 28 |
+
# ── 3. Model weights ───────────────────────────────────────────────────────
|
| 29 |
+
echo "[3/5] Copying model weights..."
|
| 30 |
+
mkdir -p "$HF/NMR_Prediction/schnet_edgeupdate"
|
| 31 |
+
cp "$NOVA/NMR_Prediction/schnet_edgeupdate/best_model.hdf5" \
|
| 32 |
+
"$HF/NMR_Prediction/schnet_edgeupdate/best_model.hdf5"
|
| 33 |
+
echo " best_model.hdf5 ✓ ($(du -sh "$HF/NMR_Prediction/schnet_edgeupdate/best_model.hdf5" | cut -f1))"
|
| 34 |
+
|
| 35 |
+
# ── 4. Vendored nfp package ────────────────────────────────────────────────
|
| 36 |
+
echo "[4/5] Copying nfp package..."
|
| 37 |
+
cp -r "$NOVA/nfp/." "$HF/nfp/"
|
| 38 |
+
echo " nfp/ ✓ ($(find "$HF/nfp" -type f | wc -l | tr -d ' ') files)"
|
| 39 |
+
|
| 40 |
+
# ── 5. Static assets ───────────────────────────────────────────────────────
|
| 41 |
+
echo "[5/5] Copying static assets..."
|
| 42 |
+
cp -r "$NOVA/NMR_Prediction/static/NMR_Prediction/." "$HF/static/"
|
| 43 |
+
echo " static/ ✓ ($(find "$HF/static" -type f | wc -l | tr -d ' ') files)"
|
| 44 |
+
|
| 45 |
+
echo ""
|
| 46 |
+
echo "=== Setup complete! ==="
|
| 47 |
+
echo "Total hf_space size: $(du -sh "$HF" | cut -f1)"
|
| 48 |
+
echo ""
|
| 49 |
+
echo "Next: docker build -t cascade-local . && docker run -p 7860:7860 cascade-local"
|
| 50 |
+
echo "Then: open http://localhost:7860/cascade_v1/predict/"
|
start.sh
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
set -e
|
| 3 |
+
|
| 4 |
+
echo "Starting Redis..."
|
| 5 |
+
redis-server --daemonize yes --loglevel warning
|
| 6 |
+
|
| 7 |
+
echo "Starting CASCADE worker..."
|
| 8 |
+
python worker.py &
|
| 9 |
+
|
| 10 |
+
echo "Starting Flask app..."
|
| 11 |
+
gunicorn app:app \
|
| 12 |
+
--bind 0.0.0.0:7860 \
|
| 13 |
+
--workers 2 \
|
| 14 |
+
--timeout 120 \
|
| 15 |
+
--log-level info
|
static/JSmol.min.js
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
static/css/doc.css
ADDED
|
@@ -0,0 +1,276 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
html{
|
| 2 |
+
font-family: "Titillium", Cambria, Helvetica, Arial, Geneva, sans-serif !important;
|
| 3 |
+
padding: 20px 5% 20px 5% !important;
|
| 4 |
+
background: #29999c;
|
| 5 |
+
}
|
| 6 |
+
|
| 7 |
+
body {
|
| 8 |
+
clear: both;
|
| 9 |
+
box-shadow: 2px 2px 3px #444;
|
| 10 |
+
border-radius: 3px;
|
| 11 |
+
padding: 20px !important;
|
| 12 |
+
margin: 0px;
|
| 13 |
+
background: #f9f9f9;
|
| 14 |
+
min-width: 650px;
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
table {
|
| 18 |
+
border-collapse: collapse;
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
a, a:link, a:active{color:#29999C !important;}
|
| 22 |
+
a:visited{color:#55299C !important;}
|
| 23 |
+
a:hover{color:#3EB9BD !important;}
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
h1 {
|
| 27 |
+
text-align: center;
|
| 28 |
+
color: #29999c;
|
| 29 |
+
background: white;
|
| 30 |
+
border-radius: 3px 3px 0px 0px;
|
| 31 |
+
margin: -20px -20px 0px -20px;
|
| 32 |
+
border-bottom: 1px solid #888;
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
h1, h1 a, h2 a{
|
| 36 |
+
font-family: "Titillium", Cambria, Helvetica, Arial, Geneva, sans-serif !important;
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
.version {
|
| 40 |
+
font-family: sans-serif;
|
| 41 |
+
font-size: smaller;
|
| 42 |
+
float: right;
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
pre {
|
| 46 |
+
padding: 30px;
|
| 47 |
+
font-family: courier;
|
| 48 |
+
max-width: 70vw;
|
| 49 |
+
overflow-x: auto;
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
input {
|
| 53 |
+
border: 1px solid;
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
iframe{
|
| 57 |
+
border:none;
|
| 58 |
+
padding: 0px;
|
| 59 |
+
margin: 0px;
|
| 60 |
+
|
| 61 |
+
box-shadow: 2px 2px 3px #888;
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
select {
|
| 65 |
+
border: 1px solid;
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
textarea {
|
| 69 |
+
font-family: monospace;
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
#molsource-box{
|
| 74 |
+
min-width: 30%;
|
| 75 |
+
width: 600px;
|
| 76 |
+
height: 350px;
|
| 77 |
+
}
|
| 78 |
+
.sketcher-frame {
|
| 79 |
+
margin: -2px;
|
| 80 |
+
resize: none;
|
| 81 |
+
min-width: 370px;
|
| 82 |
+
min-height: 370px;
|
| 83 |
+
height: 100%;
|
| 84 |
+
width: 100%;
|
| 85 |
+
overflow: hidden;
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
.resizable{
|
| 89 |
+
border: none;
|
| 90 |
+
padding: 0px 8px 8px 0px;
|
| 91 |
+
border: 0px;
|
| 92 |
+
resize: both;
|
| 93 |
+
width: 510px;
|
| 94 |
+
height: 510px;
|
| 95 |
+
min-width: 370px;
|
| 96 |
+
min-height: 370px;
|
| 97 |
+
overflow: auto;
|
| 98 |
+
background-color: transparent;
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
ul.horizontalmenu {
|
| 102 |
+
margin-top: 0px;
|
| 103 |
+
margin-bottom: 0px;
|
| 104 |
+
padding-top: 2px;
|
| 105 |
+
padding-bottom: 2px;
|
| 106 |
+
padding-left: 6px;
|
| 107 |
+
background-color: #dddddd;
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
ul.horizontalmenu li {
|
| 111 |
+
display: inline;
|
| 112 |
+
list-style-type: none;
|
| 113 |
+
padding-right: 6px;
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
input[type="checkbox"], input[type="button"] {
|
| 117 |
+
cursor: pointer;
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
input[type="number"] {
|
| 121 |
+
width: 70px;
|
| 122 |
+
text-indent: 2px;
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
.table-layout {
|
| 126 |
+
vertical-align: top;
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
.table-layout li {
|
| 130 |
+
display: table-row;
|
| 131 |
+
}
|
| 132 |
+
.table-layout li >* {
|
| 133 |
+
display: table-cell;
|
| 134 |
+
padding: 2px;
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
label {
|
| 138 |
+
cursor: pointer;
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
.darkbox {
|
| 142 |
+
display: inline-block;
|
| 143 |
+
margin-left: 10px;
|
| 144 |
+
margin-right: 10px;
|
| 145 |
+
background-color: transparent;
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
.scrollablebox {
|
| 149 |
+
width: 90%;
|
| 150 |
+
height:350px;
|
| 151 |
+
white-space: nowrap;
|
| 152 |
+
overflow: scroll;
|
| 153 |
+
margin-bottom: 20px;
|
| 154 |
+
margin-top: 0px;
|
| 155 |
+
margin-left: 2px;
|
| 156 |
+
margin-right: 2px;
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
#submitButton {
|
| 160 |
+
margin-top: -20px;
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
td { vertical-align: top; }
|
| 164 |
+
li { list-style-type: none; }
|
| 165 |
+
|
| 166 |
+
.molecule-div {
|
| 167 |
+
background-color: #bababa;
|
| 168 |
+
margin-left: 20px;
|
| 169 |
+
margin-right: 0px;
|
| 170 |
+
font-size: 11px;
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
div#serviceForm {
|
| 174 |
+
margin: 10px;
|
| 175 |
+
background-color: #ddd;
|
| 176 |
+
padding: 4px;
|
| 177 |
+
clear: none;
|
| 178 |
+
display: inline-block;
|
| 179 |
+
font-family: sans-serif;
|
| 180 |
+
font-size: 12px;
|
| 181 |
+
}
|
| 182 |
+
|
| 183 |
+
div#serviceForm input {
|
| 184 |
+
border: none;
|
| 185 |
+
margin: 2px;
|
| 186 |
+
margin-left: 0px;
|
| 187 |
+
}
|
| 188 |
+
|
| 189 |
+
.editorheader {
|
| 190 |
+
float: left;
|
| 191 |
+
margin-left: 10px;
|
| 192 |
+
margin-right: 10px;
|
| 193 |
+
margin-top: 5px;
|
| 194 |
+
margin-bottom: 5px;
|
| 195 |
+
background-color: transparent;
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
.left10 {
|
| 199 |
+
margin-left: 10px;
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
#convertStructureInputHeader {
|
| 203 |
+
display: inline-block;
|
| 204 |
+
margin-right: 10px;
|
| 205 |
+
background-color: #ddd;
|
| 206 |
+
font-size: 12px;
|
| 207 |
+
font-family: sans-serif;
|
| 208 |
+
margin-bottom: 20px;
|
| 209 |
+
}
|
| 210 |
+
|
| 211 |
+
.bordered {
|
| 212 |
+
box-shadow: 2px 2px 3px #444;
|
| 213 |
+
}
|
| 214 |
+
|
| 215 |
+
/* APIDOC style */
|
| 216 |
+
.rightSide {
|
| 217 |
+
float: right;
|
| 218 |
+
}
|
| 219 |
+
|
| 220 |
+
div.table {
|
| 221 |
+
border: 1px solid;
|
| 222 |
+
display: table;
|
| 223 |
+
width: 100%;
|
| 224 |
+
margin-bottom: 14px;
|
| 225 |
+
}
|
| 226 |
+
|
| 227 |
+
div.api-header div.api-body {
|
| 228 |
+
display: table-row;
|
| 229 |
+
}
|
| 230 |
+
|
| 231 |
+
.api-body, .api-header, div.inner-api-table {
|
| 232 |
+
padding-left: 1em;
|
| 233 |
+
padding-right: 1em;
|
| 234 |
+
}
|
| 235 |
+
|
| 236 |
+
div.api-body p:last-of-type {
|
| 237 |
+
margin-bottom: 1em;
|
| 238 |
+
}
|
| 239 |
+
|
| 240 |
+
.api-header {
|
| 241 |
+
background-color: #29999C;
|
| 242 |
+
color: #FFFFFF;
|
| 243 |
+
font-weight: bold;
|
| 244 |
+
}
|
| 245 |
+
|
| 246 |
+
span.api-method-returntype:before {
|
| 247 |
+
content: "Returns: ";
|
| 248 |
+
}
|
| 249 |
+
|
| 250 |
+
div code {
|
| 251 |
+
background-color: inherit;
|
| 252 |
+
border: none;
|
| 253 |
+
}
|
| 254 |
+
|
| 255 |
+
div.inner-api-table {
|
| 256 |
+
display: block;
|
| 257 |
+
background-color: lightblue;
|
| 258 |
+
border: none;
|
| 259 |
+
overflow-x: auto;
|
| 260 |
+
}
|
| 261 |
+
|
| 262 |
+
/* categories.html and items.html */
|
| 263 |
+
table.cell-borrder-vertical tr > td {
|
| 264 |
+
padding-top: 0.5em;
|
| 265 |
+
}
|
| 266 |
+
|
| 267 |
+
table.cell-border-vertical tr > td, table.cell-border-vertical tr > th {
|
| 268 |
+
border-left: 1px solid;
|
| 269 |
+
padding-left: 15px;
|
| 270 |
+
padding-right: 15px;
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
table.cell-border-vertical tr > td:first-of-type, table.cell-border-vertical tr > th:first-of-type {
|
| 274 |
+
border-left: none;
|
| 275 |
+
padding-left: initial;
|
| 276 |
+
}
|
static/images/cascadebanner.png
ADDED
|
static/images/custom-icon.png
ADDED
|
|
static/j2s/J/Jmol.properties
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Jmol.___JmolDate="$Date: 2018-11-28 19:07:11 -0600 (Wed, 28 Nov 2018) $"
|
| 2 |
+
Jmol.___fullJmolProperties="src/org/jmol/viewer/Jmol.properties"
|
| 3 |
+
Jmol.___JmolVersion="14.29.29"
|
static/j2s/J/adapter/readers/aflow/AFLOWReader.js
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Clazz.declarePackage ("J.adapter.readers.aflow");
|
| 2 |
+
Clazz.load (["J.adapter.readers.xtal.VaspPoscarReader", "java.util.Hashtable"], "J.adapter.readers.aflow.AFLOWReader", ["java.lang.Float", "java.util.Arrays", "JU.BS", "$.Lst", "$.PT", "$.SB", "JU.Logger"], function () {
|
| 3 |
+
c$ = Clazz.decorateAsClass (function () {
|
| 4 |
+
this.aabb = null;
|
| 5 |
+
this.readPRE = false;
|
| 6 |
+
this.fracB = NaN;
|
| 7 |
+
this.compositions = null;
|
| 8 |
+
this.getComposition = false;
|
| 9 |
+
this.listKey = null;
|
| 10 |
+
this.listKeyCase = null;
|
| 11 |
+
this.fileModelNumber = 0;
|
| 12 |
+
this.havePRE = false;
|
| 13 |
+
this.titleMsg = null;
|
| 14 |
+
this.keyMap = null;
|
| 15 |
+
Clazz.instantialize (this, arguments);
|
| 16 |
+
}, J.adapter.readers.aflow, "AFLOWReader", J.adapter.readers.xtal.VaspPoscarReader);
|
| 17 |
+
Clazz.prepareFields (c$, function () {
|
| 18 |
+
this.keyMap = new java.util.Hashtable ();
|
| 19 |
+
});
|
| 20 |
+
Clazz.overrideMethod (c$, "initializeReader",
|
| 21 |
+
function () {
|
| 22 |
+
this.readPRE = this.checkFilterKey ("PRE");
|
| 23 |
+
var s;
|
| 24 |
+
s = this.getFilter ("CA=");
|
| 25 |
+
if (s != null) this.fracB = (1 - this.parseFloatStr (s));
|
| 26 |
+
s = this.getFilter ("CB=");
|
| 27 |
+
if (s != null) this.fracB = this.parseFloatStr (s);
|
| 28 |
+
s = this.getFilter ("LIST=");
|
| 29 |
+
this.listKey = (s == null ? "HF" : s);
|
| 30 |
+
this.listKeyCase = this.listKey;
|
| 31 |
+
this.getComposition = !Float.isNaN (this.fracB);
|
| 32 |
+
this.discardLinesUntilStartsWith ("[");
|
| 33 |
+
this.aabb = this.line.substring (1, this.line.indexOf ("]"));
|
| 34 |
+
var pt = (JU.PT.isUpperCase (this.aabb.charAt (1)) ? 1 : 2);
|
| 35 |
+
this.defaultLabels = Clazz.newArray (-1, [this.aabb.substring (0, pt), this.aabb.substring (pt)]);
|
| 36 |
+
while (this.rd ().indexOf ("] REFERENCE:") >= 0) this.appendLoadNote (this.line);
|
| 37 |
+
|
| 38 |
+
this.compositions = new java.util.Hashtable ();
|
| 39 |
+
this.quiet = true;
|
| 40 |
+
this.asc.bsAtoms = new JU.BS ();
|
| 41 |
+
this.addJmolScript ("unitcell off;axes off;");
|
| 42 |
+
this.havePRE = (this.line.indexOf ("Structure PRE") >= 0);
|
| 43 |
+
});
|
| 44 |
+
Clazz.overrideMethod (c$, "checkLine",
|
| 45 |
+
function () {
|
| 46 |
+
if (!this.havePRE) this.discardLinesUntilContains ("Structure PRE");
|
| 47 |
+
this.havePRE = false;
|
| 48 |
+
if (this.line == null) return false;
|
| 49 |
+
this.continuing = new Boolean (this.continuing & this.readPrePost ()).valueOf ();
|
| 50 |
+
return this.continuing;
|
| 51 |
+
});
|
| 52 |
+
Clazz.defineMethod (c$, "readPrePost",
|
| 53 |
+
function () {
|
| 54 |
+
this.fileModelNumber++;
|
| 55 |
+
this.titleMsg = "#" + (this.modelNumber + 1) + (this.getComposition ? "," + this.fileModelNumber + ", Cb=" + this.fracB : "");
|
| 56 |
+
this.elementLabel = null;
|
| 57 |
+
var n0 = this.asc.bsAtoms.cardinality ();
|
| 58 |
+
if (this.readPRE) {
|
| 59 |
+
this.readStructure (this.titleMsg);
|
| 60 |
+
} else {
|
| 61 |
+
this.readElementLabelsOnly ();
|
| 62 |
+
this.discardLinesUntilContains ("Structure POST");
|
| 63 |
+
this.readStructure (this.titleMsg);
|
| 64 |
+
}if (this.getData ()) {
|
| 65 |
+
this.applySymmetryAndSetTrajectory ();
|
| 66 |
+
} else {
|
| 67 |
+
this.asc.bsAtoms.clearBits (this.asc.getLastAtomSetAtomIndex (), this.asc.ac);
|
| 68 |
+
this.doCheckUnitCell = false;
|
| 69 |
+
}this.finalizeModel ();
|
| 70 |
+
if (n0 != this.asc.bsAtoms.cardinality ()) JU.Logger.info ("AFLOW: file#, saved#, atoms: " + this.fileModelNumber + " " + this.modelNumber + " " + (this.asc.bsAtoms.cardinality () - n0));
|
| 71 |
+
return !this.haveModel || this.modelNumber != this.desiredModelNumber;
|
| 72 |
+
});
|
| 73 |
+
Clazz.defineMethod (c$, "finalizeModel",
|
| 74 |
+
function () {
|
| 75 |
+
this.asc.removeLastUnselectedAtoms ();
|
| 76 |
+
});
|
| 77 |
+
Clazz.defineMethod (c$, "readElementLabelsOnly",
|
| 78 |
+
function () {
|
| 79 |
+
this.readLines (5);
|
| 80 |
+
this.rdline ();
|
| 81 |
+
var n = this.getTokens ().length;
|
| 82 |
+
this.elementLabel = new Array (n);
|
| 83 |
+
this.rdline ();
|
| 84 |
+
this.line = "";
|
| 85 |
+
var s = null;
|
| 86 |
+
var last = null;
|
| 87 |
+
for (var i = 0; i < n; i++) {
|
| 88 |
+
while (s == null || s.equals (last)) {
|
| 89 |
+
this.rdline ();
|
| 90 |
+
var tokens = this.getTokens ();
|
| 91 |
+
if (tokens.length != 4 || (s = this.elementLabel[i] = this.getElement (tokens[3])) == null) {
|
| 92 |
+
i = n + 1;
|
| 93 |
+
break;
|
| 94 |
+
}}
|
| 95 |
+
last = s;
|
| 96 |
+
}
|
| 97 |
+
if (s == null) this.elementLabel = this.defaultLabels;
|
| 98 |
+
});
|
| 99 |
+
Clazz.defineMethod (c$, "getData",
|
| 100 |
+
function () {
|
| 101 |
+
this.discardLinesUntilContains ("- DATA -");
|
| 102 |
+
var htAFLOW = new java.util.Hashtable ();
|
| 103 |
+
htAFLOW.put ("fileModelNumber", Integer.$valueOf (this.fileModelNumber));
|
| 104 |
+
htAFLOW.put ("modelNumber", Integer.$valueOf (this.modelNumber + 1));
|
| 105 |
+
htAFLOW.put ("AaBb", this.aabb);
|
| 106 |
+
var pt = 0;
|
| 107 |
+
var sb = new JU.SB ();
|
| 108 |
+
var listVal = 3.4028235E38;
|
| 109 |
+
var strcb = "?";
|
| 110 |
+
var listValStr = null;
|
| 111 |
+
var cb = 0;
|
| 112 |
+
while (this.rdline () != null && (pt = this.line.indexOf (" # ")) >= 0) {
|
| 113 |
+
var key = this.line.substring (pt + 3).trim ();
|
| 114 |
+
var val = this.line.substring (0, pt).trim ();
|
| 115 |
+
sb.append (key).append ("=").append (val).append (" | ");
|
| 116 |
+
if (key.toUpperCase ().startsWith (this.listKey)) {
|
| 117 |
+
this.listKey = key.toUpperCase ();
|
| 118 |
+
this.listKeyCase = key;
|
| 119 |
+
listValStr = val;
|
| 120 |
+
listVal = this.parseFloatStr (val);
|
| 121 |
+
}if (key.equals ("Ca")) {
|
| 122 |
+
var ca = this.parseFloatStr (val);
|
| 123 |
+
if (this.getComposition && Math.abs ((1 - ca) - this.fracB) > 0.01) return false;
|
| 124 |
+
} else if (key.equals ("Cb")) {
|
| 125 |
+
cb = this.parseFloatStr (strcb = val);
|
| 126 |
+
if (this.getComposition && Math.abs (cb - this.fracB) > 0.01) return false;
|
| 127 |
+
} else if (key.equals ("Hf_atom [eV] (VASP)")) {
|
| 128 |
+
var e = this.parseFloatStr (val);
|
| 129 |
+
this.asc.setAtomSetEnergy (val, e);
|
| 130 |
+
}}
|
| 131 |
+
this.asc.setAtomSetName (this.titleMsg + (this.getComposition ? "" : " Cb=" + cb) + " " + this.listKey + "=" + listValStr);
|
| 132 |
+
var count_min = this.compositions.get (strcb);
|
| 133 |
+
if (!this.doGetModel (++this.modelNumber, null)) return false;
|
| 134 |
+
if (count_min == null) this.compositions.put (strcb, count_min = Clazz.newFloatArray (-1, [0, 3.4028235E38, 0]));
|
| 135 |
+
count_min[0]++;
|
| 136 |
+
if (listVal < count_min[1]) {
|
| 137 |
+
count_min[1] = listVal;
|
| 138 |
+
count_min[2] = this.fileModelNumber;
|
| 139 |
+
}while (this.line.indexOf ("- URL -") < 0) this.rdline ();
|
| 140 |
+
|
| 141 |
+
sb.append ("URL=" + this.rdline () + "|");
|
| 142 |
+
while (this.line.indexOf ("aurl=") < 0) this.rdline ();
|
| 143 |
+
|
| 144 |
+
sb.append (this.line);
|
| 145 |
+
var pairs = JU.PT.split (sb.toString (), " | ");
|
| 146 |
+
for (var i = pairs.length; --i >= 0; ) {
|
| 147 |
+
var kv = pairs[i].$plit ("=");
|
| 148 |
+
if (kv.length < 2) continue;
|
| 149 |
+
var f = this.parseFloatStr (kv[1]);
|
| 150 |
+
var o = Float.isNaN (f) ? kv[1] : Float.$valueOf (f);
|
| 151 |
+
htAFLOW.put (kv[0], o);
|
| 152 |
+
var kvclean = this.cleanKey (kv[0]);
|
| 153 |
+
if (kvclean !== kv[0]) htAFLOW.put (kvclean, o);
|
| 154 |
+
}
|
| 155 |
+
this.asc.setCurrentModelInfo ("aflowInfo", htAFLOW);
|
| 156 |
+
return true;
|
| 157 |
+
});
|
| 158 |
+
Clazz.defineMethod (c$, "cleanKey",
|
| 159 |
+
function (key) {
|
| 160 |
+
var kclean = this.keyMap.get (key);
|
| 161 |
+
if (kclean != null) return kclean;
|
| 162 |
+
var chars = key.toCharArray ();
|
| 163 |
+
for (var i = chars.length; --i >= 0; ) if (!JU.PT.isLetterOrDigit (chars[i])) chars[i] = '_';
|
| 164 |
+
|
| 165 |
+
this.keyMap.put (key, kclean = JU.PT.trim (JU.PT.rep ( String.instantialize (chars), "__", "_"), "_"));
|
| 166 |
+
return kclean;
|
| 167 |
+
}, "~S");
|
| 168 |
+
Clazz.overrideMethod (c$, "finalizeSubclassReader",
|
| 169 |
+
function () {
|
| 170 |
+
this.alignUnitCells ();
|
| 171 |
+
this.listCompositions ();
|
| 172 |
+
this.finalizeReaderASCR ();
|
| 173 |
+
});
|
| 174 |
+
Clazz.defineMethod (c$, "listCompositions",
|
| 175 |
+
function () {
|
| 176 |
+
var list = new JU.Lst ();
|
| 177 |
+
for (var e, $e = this.compositions.entrySet ().iterator (); $e.hasNext () && ((e = $e.next ()) || true);) {
|
| 178 |
+
var count_min = e.getValue ();
|
| 179 |
+
list.addLast (e.getKey () + "\t" + (Clazz.floatToInt (count_min[0])) + "\t" + Clazz.floatToInt (count_min[2]) + "\t" + this.listKeyCase + "\t" + count_min[1]);
|
| 180 |
+
}
|
| 181 |
+
var a = new Array (list.size ());
|
| 182 |
+
list.toArray (a);
|
| 183 |
+
java.util.Arrays.sort (a);
|
| 184 |
+
for (var i = 0, n = a.length; i < n; i++) this.appendLoadNote (this.aabb + "\t" + a[i]);
|
| 185 |
+
|
| 186 |
+
});
|
| 187 |
+
Clazz.defineMethod (c$, "alignUnitCells",
|
| 188 |
+
function () {
|
| 189 |
+
});
|
| 190 |
+
});
|
static/j2s/J/adapter/readers/cif/Cif2DataParser.js
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Clazz.declarePackage ("J.adapter.readers.cif");
|
| 2 |
+
Clazz.load (["JU.CifDataParser"], "J.adapter.readers.cif.Cif2DataParser", ["java.lang.Float", "java.util.Hashtable", "JU.Lst", "$.PT"], function () {
|
| 3 |
+
c$ = Clazz.declareType (J.adapter.readers.cif, "Cif2DataParser", JU.CifDataParser);
|
| 4 |
+
Clazz.overrideMethod (c$, "getVersion",
|
| 5 |
+
function () {
|
| 6 |
+
return 2;
|
| 7 |
+
});
|
| 8 |
+
Clazz.overrideMethod (c$, "toUnicode",
|
| 9 |
+
function (data) {
|
| 10 |
+
return data;
|
| 11 |
+
}, "~S");
|
| 12 |
+
Clazz.overrideMethod (c$, "isQuote",
|
| 13 |
+
function (ch) {
|
| 14 |
+
switch (ch) {
|
| 15 |
+
case '\1':
|
| 16 |
+
case '\'':
|
| 17 |
+
case '\"':
|
| 18 |
+
case '[':
|
| 19 |
+
case ']':
|
| 20 |
+
case '{':
|
| 21 |
+
case '}':
|
| 22 |
+
case ';':
|
| 23 |
+
return true;
|
| 24 |
+
}
|
| 25 |
+
return false;
|
| 26 |
+
}, "~S");
|
| 27 |
+
Clazz.overrideMethod (c$, "getQuotedStringOrObject",
|
| 28 |
+
function (ch) {
|
| 29 |
+
return this.processQuotedString ();
|
| 30 |
+
}, "~S");
|
| 31 |
+
Clazz.overrideMethod (c$, "preprocessString",
|
| 32 |
+
function () {
|
| 33 |
+
this.line = (this.ich == 0 ? this.str : this.str.substring (this.ich));
|
| 34 |
+
return this.setString (this.processSemiString ());
|
| 35 |
+
});
|
| 36 |
+
Clazz.defineMethod (c$, "processQuotedString",
|
| 37 |
+
function () {
|
| 38 |
+
var str = null;
|
| 39 |
+
var quoteChar = this.str.charAt (this.ich);
|
| 40 |
+
var tripleChar = null;
|
| 41 |
+
try {
|
| 42 |
+
switch (quoteChar) {
|
| 43 |
+
case '\1':
|
| 44 |
+
str = this.str.substring (1, (this.ich = this.str.indexOf ("\1", this.ich + 1)));
|
| 45 |
+
this.ich++;
|
| 46 |
+
break;
|
| 47 |
+
case '[':
|
| 48 |
+
return this.readList ();
|
| 49 |
+
case ']':
|
| 50 |
+
this.ich++;
|
| 51 |
+
return "]";
|
| 52 |
+
case '{':
|
| 53 |
+
return this.readTable ();
|
| 54 |
+
case '}':
|
| 55 |
+
this.ich++;
|
| 56 |
+
return "}";
|
| 57 |
+
case '\'':
|
| 58 |
+
case '"':
|
| 59 |
+
if (this.str.indexOf ("'''") == this.ich) tripleChar = "'''";
|
| 60 |
+
else if (this.str.indexOf ("\"\"\"") == this.ich) tripleChar = "\"\"\"";
|
| 61 |
+
var nchar = (tripleChar == null ? 1 : 3);
|
| 62 |
+
var pt = this.ich + nchar;
|
| 63 |
+
var pt1 = 0;
|
| 64 |
+
while ((pt1 = (tripleChar == null ? this.str.indexOf (quoteChar, pt) : this.str.indexOf (tripleChar, pt))) < 0) {
|
| 65 |
+
if (this.readLine () == null) break;
|
| 66 |
+
this.str += this.line;
|
| 67 |
+
}
|
| 68 |
+
this.ich = pt1 + nchar;
|
| 69 |
+
this.cch = this.str.length;
|
| 70 |
+
str = this.str.substring (pt, pt1);
|
| 71 |
+
break;
|
| 72 |
+
}
|
| 73 |
+
} catch (e) {
|
| 74 |
+
if (Clazz.exceptionOf (e, Exception)) {
|
| 75 |
+
System.out.println ("exception in Cif2DataParser ; " + e);
|
| 76 |
+
} else {
|
| 77 |
+
throw e;
|
| 78 |
+
}
|
| 79 |
+
}
|
| 80 |
+
return (this.cterm == '\0' || this.asObject ? str : JU.PT.esc (str));
|
| 81 |
+
});
|
| 82 |
+
Clazz.defineMethod (c$, "processSemiString",
|
| 83 |
+
function () {
|
| 84 |
+
var pt1;
|
| 85 |
+
var pt2;
|
| 86 |
+
var str = this.preprocessSemiString ();
|
| 87 |
+
if (str.indexOf (';') != 1 && (pt1 = str.indexOf ('\\')) > 1 && ((pt2 = str.indexOf ('\n')) > pt1 || pt2 < 0)) {
|
| 88 |
+
var prefix = str.substring (1, pt1);
|
| 89 |
+
str = JU.PT.rep (str, "\n" + prefix, "\n");
|
| 90 |
+
str = "\1" + str.substring (str.charAt (pt1 + 1) == '\\' ? pt1 + 1 : pt2 < 0 ? str.length - 1 : pt2 + 1);
|
| 91 |
+
}this.ich = 0;
|
| 92 |
+
return this.fixLineFolding (str);
|
| 93 |
+
});
|
| 94 |
+
Clazz.defineMethod (c$, "readList",
|
| 95 |
+
function () {
|
| 96 |
+
this.ich++;
|
| 97 |
+
var cterm0 = this.cterm;
|
| 98 |
+
this.cterm = ']';
|
| 99 |
+
var ns = this.nullString;
|
| 100 |
+
this.nullString = null;
|
| 101 |
+
var lst = (this.asObject ? new JU.Lst () : null);
|
| 102 |
+
var n = 0;
|
| 103 |
+
var str = "";
|
| 104 |
+
while (true) {
|
| 105 |
+
var value = (this.asObject ? this.getNextTokenObject () : this.getNextToken ());
|
| 106 |
+
if (value == null || value.equals ("]")) break;
|
| 107 |
+
if (this.asObject) {
|
| 108 |
+
lst.addLast (value);
|
| 109 |
+
} else {
|
| 110 |
+
if (n++ > 0) str += ",";
|
| 111 |
+
str += value;
|
| 112 |
+
}}
|
| 113 |
+
this.cterm = cterm0;
|
| 114 |
+
this.nullString = ns;
|
| 115 |
+
return (this.asObject ? lst : "[" + str + "]");
|
| 116 |
+
});
|
| 117 |
+
Clazz.defineMethod (c$, "readTable",
|
| 118 |
+
function () {
|
| 119 |
+
this.ich++;
|
| 120 |
+
var cterm0 = this.cterm;
|
| 121 |
+
this.cterm = '}';
|
| 122 |
+
var ns = this.nullString;
|
| 123 |
+
this.nullString = null;
|
| 124 |
+
var map = (this.asObject ? new java.util.Hashtable () : null);
|
| 125 |
+
var n = 0;
|
| 126 |
+
var str = "";
|
| 127 |
+
while (true) {
|
| 128 |
+
var key = this.getNextToken ();
|
| 129 |
+
if (key == null || key.equals ("}")) break;
|
| 130 |
+
while (this.isSpaceOrColon (this.ich)) this.ich++;
|
| 131 |
+
|
| 132 |
+
if (this.asObject) {
|
| 133 |
+
map.put (key, this.getNextTokenObject ());
|
| 134 |
+
} else {
|
| 135 |
+
if (n++ > 0) str += ",";
|
| 136 |
+
str += key + " : " + this.getNextToken ();
|
| 137 |
+
}}
|
| 138 |
+
this.cterm = cterm0;
|
| 139 |
+
this.nullString = ns;
|
| 140 |
+
return (this.asObject ? map : "{" + str + "}");
|
| 141 |
+
});
|
| 142 |
+
Clazz.defineMethod (c$, "isSpaceOrColon",
|
| 143 |
+
function (ich) {
|
| 144 |
+
if (ich < this.cch) switch (this.line.charAt (ich)) {
|
| 145 |
+
case ' ':
|
| 146 |
+
case '\t':
|
| 147 |
+
case '\n':
|
| 148 |
+
case ':':
|
| 149 |
+
return true;
|
| 150 |
+
}
|
| 151 |
+
return false;
|
| 152 |
+
}, "~N");
|
| 153 |
+
Clazz.overrideMethod (c$, "unquoted",
|
| 154 |
+
function (s) {
|
| 155 |
+
if (this.cterm == '\0' && !this.asObject) return s;
|
| 156 |
+
var n = s.length;
|
| 157 |
+
if (n > 0) {
|
| 158 |
+
var c = s.charAt (0);
|
| 159 |
+
if (JU.PT.isDigit (c) || c == '-' || c == '.' && n > 1) {
|
| 160 |
+
var pt = s.indexOf ('(');
|
| 161 |
+
var isFloat = (s.indexOf (".") >= 0);
|
| 162 |
+
if (n > 1 && pt > 0 && s.indexOf (')', pt + 1) == n - 1) s = s.substring (0, pt);
|
| 163 |
+
try {
|
| 164 |
+
if (isFloat) {
|
| 165 |
+
var f = Float.parseFloat (s);
|
| 166 |
+
if (this.asObject) return Float.$valueOf (f);
|
| 167 |
+
s = "" + f;
|
| 168 |
+
if (s.indexOf (".") < 0 && s.indexOf ("E") < 0) s += ".0";
|
| 169 |
+
return s;
|
| 170 |
+
}var i = Integer.parseInt (s);
|
| 171 |
+
return (this.asObject ? Integer.$valueOf (i) : "" + i);
|
| 172 |
+
} catch (e) {
|
| 173 |
+
}
|
| 174 |
+
}}return (this.asObject ? s : JU.PT.esc (s));
|
| 175 |
+
}, "~S");
|
| 176 |
+
Clazz.defineMethod (c$, "fixLineFolding",
|
| 177 |
+
function (str) {
|
| 178 |
+
if (str.indexOf ('\\') < 0) return str;
|
| 179 |
+
var n = str.length;
|
| 180 |
+
if (str.endsWith ("\\\1")) str = str.substring (0, n - 1) + "\n\1";
|
| 181 |
+
var pt = 0;
|
| 182 |
+
while ((pt = str.indexOf ('\\', pt + 1)) >= 0) {
|
| 183 |
+
var eol = str.indexOf ('\n', pt);
|
| 184 |
+
if (eol < 0) break;
|
| 185 |
+
for (var i = eol; --i > pt; ) {
|
| 186 |
+
var ch = str.charAt (i);
|
| 187 |
+
if (!JU.PT.isWhitespace (ch)) {
|
| 188 |
+
if (ch == '\\') {
|
| 189 |
+
pt = i;
|
| 190 |
+
break;
|
| 191 |
+
}pt = eol;
|
| 192 |
+
break;
|
| 193 |
+
}}
|
| 194 |
+
if (pt < eol) str = str.substring (0, pt) + str.substring (eol + 1);
|
| 195 |
+
}
|
| 196 |
+
return str;
|
| 197 |
+
}, "~S");
|
| 198 |
+
c$.getArrayFromStringList = Clazz.defineMethod (c$, "getArrayFromStringList",
|
| 199 |
+
function (s, n) {
|
| 200 |
+
var f = Clazz.newFloatArray (n, 0);
|
| 201 |
+
JU.PT.parseFloatArrayInfested (JU.PT.getTokens (s.$replace (',', ' ').$replace ('[', ' ')), f);
|
| 202 |
+
var d = Clazz.newDoubleArray (n, 0);
|
| 203 |
+
for (var i = 0; i < n; i++) d[i] = f[i];
|
| 204 |
+
|
| 205 |
+
return d;
|
| 206 |
+
}, "~S,~N");
|
| 207 |
+
});
|
static/j2s/J/adapter/readers/cif/Cif2Reader.js
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Clazz.declarePackage ("J.adapter.readers.cif");
|
| 2 |
+
Clazz.load (["J.adapter.readers.cif.CifReader"], "J.adapter.readers.cif.Cif2Reader", ["J.adapter.readers.cif.Cif2DataParser"], function () {
|
| 3 |
+
c$ = Clazz.declareType (J.adapter.readers.cif, "Cif2Reader", J.adapter.readers.cif.CifReader);
|
| 4 |
+
Clazz.overrideMethod (c$, "getCifDataParser",
|
| 5 |
+
function () {
|
| 6 |
+
return new J.adapter.readers.cif.Cif2DataParser ().set (this, null, this.debugging);
|
| 7 |
+
});
|
| 8 |
+
});
|
static/j2s/J/adapter/readers/cif/CifReader.js
ADDED
|
@@ -0,0 +1,1226 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Clazz.declarePackage ("J.adapter.readers.cif");
|
| 2 |
+
Clazz.load (["J.adapter.smarter.AtomSetCollectionReader", "JU.Lst", "$.P3"], "J.adapter.readers.cif.CifReader", ["java.lang.Boolean", "$.Character", "$.Float", "java.util.Hashtable", "JU.BS", "$.CifDataParser", "$.PT", "$.Rdr", "$.V3", "J.adapter.smarter.Atom", "J.api.JmolAdapter", "JU.Logger", "$.Vibration"], function () {
|
| 3 |
+
c$ = Clazz.decorateAsClass (function () {
|
| 4 |
+
this.modr = null;
|
| 5 |
+
this.parser = null;
|
| 6 |
+
this.isAFLOW = false;
|
| 7 |
+
this.filterAssembly = false;
|
| 8 |
+
this.allowRotations = true;
|
| 9 |
+
this.readIdeal = true;
|
| 10 |
+
this.configurationPtr = -2147483648;
|
| 11 |
+
this.useAuthorChainID = true;
|
| 12 |
+
this.thisDataSetName = "";
|
| 13 |
+
this.lastDataSetName = null;
|
| 14 |
+
this.chemicalName = "";
|
| 15 |
+
this.thisStructuralFormula = "";
|
| 16 |
+
this.thisFormula = "";
|
| 17 |
+
this.iHaveDesiredModel = false;
|
| 18 |
+
this.isMMCIF = false;
|
| 19 |
+
this.isLigand = false;
|
| 20 |
+
this.isMagCIF = false;
|
| 21 |
+
this.haveHAtoms = false;
|
| 22 |
+
this.molecularType = "GEOM_BOND default";
|
| 23 |
+
this.lastAltLoc = '\0';
|
| 24 |
+
this.haveAromatic = false;
|
| 25 |
+
this.conformationIndex = 0;
|
| 26 |
+
this.nMolecular = 0;
|
| 27 |
+
this.appendedData = null;
|
| 28 |
+
this.skipping = false;
|
| 29 |
+
this.nAtoms = 0;
|
| 30 |
+
this.ac = 0;
|
| 31 |
+
this.auditBlockCode = null;
|
| 32 |
+
this.lastSpaceGroupName = null;
|
| 33 |
+
this.modulated = false;
|
| 34 |
+
this.isCourseGrained = false;
|
| 35 |
+
this.haveCellWaveVector = false;
|
| 36 |
+
this.$latticeType = null;
|
| 37 |
+
this.htGroup1 = null;
|
| 38 |
+
this.nAtoms0 = 0;
|
| 39 |
+
this.titleAtomSet = 1;
|
| 40 |
+
this.intTableNo = 0;
|
| 41 |
+
this.htCellTypes = null;
|
| 42 |
+
this.modelMap = null;
|
| 43 |
+
this.htAudit = null;
|
| 44 |
+
this.symops = null;
|
| 45 |
+
this.pdbID = null;
|
| 46 |
+
this.key = null;
|
| 47 |
+
this.key0 = null;
|
| 48 |
+
this.data = null;
|
| 49 |
+
this.isLoop = false;
|
| 50 |
+
this.col2key = null;
|
| 51 |
+
this.key2col = null;
|
| 52 |
+
this.field = null;
|
| 53 |
+
this.firstChar = '\0';
|
| 54 |
+
this.htOxStates = null;
|
| 55 |
+
this.bondTypes = null;
|
| 56 |
+
this.disorderAssembly = ".";
|
| 57 |
+
this.lastDisorderAssembly = null;
|
| 58 |
+
this.lattvecs = null;
|
| 59 |
+
this.magCenterings = null;
|
| 60 |
+
this.maxSerial = 0;
|
| 61 |
+
this.atomRadius = null;
|
| 62 |
+
this.bsConnected = null;
|
| 63 |
+
this.bsSets = null;
|
| 64 |
+
this.ptOffset = null;
|
| 65 |
+
this.bsMolecule = null;
|
| 66 |
+
this.bsExclude = null;
|
| 67 |
+
this.firstAtom = 0;
|
| 68 |
+
this.atoms = null;
|
| 69 |
+
this.bsBondDuplicates = null;
|
| 70 |
+
Clazz.instantialize (this, arguments);
|
| 71 |
+
}, J.adapter.readers.cif, "CifReader", J.adapter.smarter.AtomSetCollectionReader);
|
| 72 |
+
Clazz.prepareFields (c$, function () {
|
| 73 |
+
this.col2key = Clazz.newIntArray (100, 0);
|
| 74 |
+
this.key2col = Clazz.newIntArray (100, 0);
|
| 75 |
+
this.bondTypes = new JU.Lst ();
|
| 76 |
+
this.ptOffset = new JU.P3 ();
|
| 77 |
+
});
|
| 78 |
+
Clazz.overrideMethod (c$, "initializeReader",
|
| 79 |
+
function () {
|
| 80 |
+
this.initSubclass ();
|
| 81 |
+
this.allowPDBFilter = true;
|
| 82 |
+
this.appendedData = this.htParams.get ("appendedData");
|
| 83 |
+
var conf = this.getFilter ("CONF ");
|
| 84 |
+
if (conf != null) this.configurationPtr = this.parseIntStr (conf);
|
| 85 |
+
this.isMolecular = this.checkFilterKey ("MOLECUL") && !this.checkFilterKey ("BIOMOLECULE");
|
| 86 |
+
this.isPrimitive = this.checkFilterKey ("PRIMITIVE");
|
| 87 |
+
this.readIdeal = !this.checkFilterKey ("NOIDEAL");
|
| 88 |
+
this.filterAssembly = this.checkFilterKey ("$");
|
| 89 |
+
this.useAuthorChainID = !this.checkFilterKey ("NOAUTHORCHAINS");
|
| 90 |
+
if (this.isMolecular) {
|
| 91 |
+
this.forceSymmetry (false);
|
| 92 |
+
this.molecularType = "filter \"MOLECULAR\"";
|
| 93 |
+
}this.asc.checkSpecial = !this.checkFilterKey ("NOSPECIAL");
|
| 94 |
+
this.allowRotations = !this.checkFilterKey ("NOSYM");
|
| 95 |
+
if (this.strSupercell != null && this.strSupercell.indexOf (",") >= 0) this.addCellType ("conventional", this.strSupercell, true);
|
| 96 |
+
if (this.binaryDoc != null) return;
|
| 97 |
+
this.readCifData ();
|
| 98 |
+
this.continuing = false;
|
| 99 |
+
});
|
| 100 |
+
Clazz.defineMethod (c$, "initSubclass",
|
| 101 |
+
function () {
|
| 102 |
+
});
|
| 103 |
+
Clazz.defineMethod (c$, "readCifData",
|
| 104 |
+
function () {
|
| 105 |
+
this.parser = this.getCifDataParser ();
|
| 106 |
+
this.line = "";
|
| 107 |
+
while ((this.key = this.parser.peekToken ()) != null) if (!this.readAllData ()) break;
|
| 108 |
+
|
| 109 |
+
if (this.appendedData != null) {
|
| 110 |
+
this.parser = (this.getInterface ("JU.CifDataParser")).set (null, JU.Rdr.getBR (this.appendedData), this.debugging);
|
| 111 |
+
while ((this.key = this.parser.peekToken ()) != null) if (!this.readAllData ()) break;
|
| 112 |
+
|
| 113 |
+
}});
|
| 114 |
+
Clazz.defineMethod (c$, "getCifDataParser",
|
| 115 |
+
function () {
|
| 116 |
+
return new JU.CifDataParser ().set (this, null, this.debugging);
|
| 117 |
+
});
|
| 118 |
+
Clazz.defineMethod (c$, "readAllData",
|
| 119 |
+
function () {
|
| 120 |
+
if (this.key.startsWith ("data_")) {
|
| 121 |
+
this.isLigand = false;
|
| 122 |
+
if (this.asc.atomSetCount == 0) this.iHaveDesiredModel = false;
|
| 123 |
+
if (this.iHaveDesiredModel) return false;
|
| 124 |
+
if (this.desiredModelNumber != -2147483648) this.appendLoadNote (null);
|
| 125 |
+
this.newModel (++this.modelNumber);
|
| 126 |
+
this.haveCellWaveVector = false;
|
| 127 |
+
if (this.auditBlockCode == null) this.modulated = false;
|
| 128 |
+
if (!this.skipping) {
|
| 129 |
+
this.nAtoms0 = this.asc.ac;
|
| 130 |
+
this.processDataParameter ();
|
| 131 |
+
this.nAtoms = this.asc.ac;
|
| 132 |
+
}return true;
|
| 133 |
+
}if (this.skipping && this.key.equals ("_audit_block_code")) {
|
| 134 |
+
this.iHaveDesiredModel = false;
|
| 135 |
+
this.skipping = false;
|
| 136 |
+
}this.isLoop = this.key.startsWith ("loop_");
|
| 137 |
+
if (this.isLoop) {
|
| 138 |
+
if (this.skipping && !this.isMMCIF) {
|
| 139 |
+
this.parser.getTokenPeeked ();
|
| 140 |
+
this.parser.skipLoop (false);
|
| 141 |
+
} else {
|
| 142 |
+
this.processLoopBlock ();
|
| 143 |
+
}return true;
|
| 144 |
+
}if (this.key.indexOf ("_") != 0) {
|
| 145 |
+
JU.Logger.warn (this.key.startsWith ("save_") ? "CIF reader ignoring save_" : "CIF ERROR ? should be an underscore: " + this.key);
|
| 146 |
+
this.parser.getTokenPeeked ();
|
| 147 |
+
} else if (!this.getData ()) {
|
| 148 |
+
return true;
|
| 149 |
+
}if (!this.skipping) {
|
| 150 |
+
this.key = this.parser.fixKey (this.key0 = this.key);
|
| 151 |
+
if (this.key.startsWith ("_chemical_name") || this.key.equals ("_chem_comp_name")) {
|
| 152 |
+
this.processChemicalInfo ("name");
|
| 153 |
+
} else if (this.key.startsWith ("_chemical_formula_structural")) {
|
| 154 |
+
this.processChemicalInfo ("structuralFormula");
|
| 155 |
+
} else if (this.key.startsWith ("_chemical_formula_sum") || this.key.equals ("_chem_comp_formula")) {
|
| 156 |
+
this.processChemicalInfo ("formula");
|
| 157 |
+
} else if (this.key.equals ("_cell_modulation_dimension")) {
|
| 158 |
+
this.modDim = this.parseIntStr (this.data);
|
| 159 |
+
} else if (this.key.startsWith ("_cell_") && this.key.indexOf ("_commen_") < 0) {
|
| 160 |
+
this.processCellParameter ();
|
| 161 |
+
} else if (this.key.startsWith ("_atom_sites_fract_tran")) {
|
| 162 |
+
this.processUnitCellTransformMatrix ();
|
| 163 |
+
} else if (this.key.startsWith ("_audit")) {
|
| 164 |
+
if (this.key.equals ("_audit_block_code")) {
|
| 165 |
+
this.auditBlockCode = this.parser.fullTrim (this.data).toUpperCase ();
|
| 166 |
+
this.appendLoadNote (this.auditBlockCode);
|
| 167 |
+
if (this.htAudit != null && this.auditBlockCode.contains ("_MOD_")) {
|
| 168 |
+
var key = JU.PT.rep (this.auditBlockCode, "_MOD_", "_REFRNCE_");
|
| 169 |
+
if (this.asc.setSymmetry (this.htAudit.get (key)) != null) {
|
| 170 |
+
this.unitCellParams = this.asc.getSymmetry ().getUnitCellParams ();
|
| 171 |
+
this.iHaveUnitCell = true;
|
| 172 |
+
}} else if (this.htAudit != null) {
|
| 173 |
+
if (this.symops != null) for (var i = 0; i < this.symops.size (); i++) this.setSymmetryOperator (this.symops.get (i));
|
| 174 |
+
|
| 175 |
+
}if (this.lastSpaceGroupName != null) this.setSpaceGroupName (this.lastSpaceGroupName);
|
| 176 |
+
} else if (this.key.equals ("_audit_creation_date")) {
|
| 177 |
+
this.symmetry = null;
|
| 178 |
+
}} else if (this.key.equals (J.adapter.readers.cif.CifReader.singleAtomID)) {
|
| 179 |
+
this.readSingleAtom ();
|
| 180 |
+
} else if (this.key.startsWith ("_symmetry_space_group_name_h-m") || this.key.startsWith ("_symmetry_space_group_name_hall") || this.key.startsWith ("_space_group_name") || this.key.contains ("_ssg_name") || this.key.contains ("_magn_name") || this.key.contains ("_bns_name")) {
|
| 181 |
+
this.processSymmetrySpaceGroupName ();
|
| 182 |
+
} else if (this.key.startsWith ("_space_group_transform") || this.key.startsWith ("_parent_space_group") || this.key.startsWith ("_space_group_magn_transform")) {
|
| 183 |
+
this.processUnitCellTransform ();
|
| 184 |
+
} else if (this.key.contains ("_database_code")) {
|
| 185 |
+
this.addModelTitle ("ID");
|
| 186 |
+
} else if ("__citation_title__publ_section_title__active_magnetic_irreps_details__".contains ("_" + this.key + "__")) {
|
| 187 |
+
this.addModelTitle ("TITLE");
|
| 188 |
+
} else if (this.key.startsWith ("_aflow_")) {
|
| 189 |
+
this.isAFLOW = true;
|
| 190 |
+
} else if (this.key.equals ("_symmetry_int_tables_number")) {
|
| 191 |
+
this.intTableNo = this.parseIntStr (this.data);
|
| 192 |
+
this.rotateHexCell = (this.isAFLOW && (this.intTableNo >= 143 && this.intTableNo <= 194));
|
| 193 |
+
} else if (this.key.equals ("_entry_id")) {
|
| 194 |
+
this.pdbID = this.data;
|
| 195 |
+
} else {
|
| 196 |
+
this.processSubclassEntry ();
|
| 197 |
+
}}return true;
|
| 198 |
+
});
|
| 199 |
+
Clazz.defineMethod (c$, "addModelTitle",
|
| 200 |
+
function (key) {
|
| 201 |
+
if (this.asc.atomSetCount > this.titleAtomSet) this.appendLoadNote ("\nMODEL: " + (this.titleAtomSet = this.asc.atomSetCount));
|
| 202 |
+
this.appendLoadNote (key + ": " + this.parser.fullTrim (this.data));
|
| 203 |
+
}, "~S");
|
| 204 |
+
Clazz.defineMethod (c$, "processSubclassEntry",
|
| 205 |
+
function () {
|
| 206 |
+
if (this.modDim > 0) this.getModulationReader ().processEntry ();
|
| 207 |
+
});
|
| 208 |
+
Clazz.defineMethod (c$, "processUnitCellTransform",
|
| 209 |
+
function () {
|
| 210 |
+
this.data = JU.PT.replaceAllCharacters (this.data, " ", "");
|
| 211 |
+
if (this.key.contains ("_from_parent") || this.key.contains ("child_transform")) this.addCellType ("parent", this.data, true);
|
| 212 |
+
else if (this.key.contains ("_to_standard") || this.key.contains ("transform_bns_pp_abc")) this.addCellType ("standard", this.data, false);
|
| 213 |
+
this.appendLoadNote (this.key + ": " + this.data);
|
| 214 |
+
});
|
| 215 |
+
Clazz.defineMethod (c$, "addCellType",
|
| 216 |
+
function (type, data, isFrom) {
|
| 217 |
+
if (this.htCellTypes == null) this.htCellTypes = new java.util.Hashtable ();
|
| 218 |
+
if (data.startsWith ("!")) {
|
| 219 |
+
data = data.substring (1);
|
| 220 |
+
isFrom = !isFrom;
|
| 221 |
+
}var cell = (isFrom ? "!" : "") + data;
|
| 222 |
+
this.htCellTypes.put (type, cell);
|
| 223 |
+
if (type.equalsIgnoreCase (this.strSupercell)) {
|
| 224 |
+
this.strSupercell = cell;
|
| 225 |
+
this.htCellTypes.put ("conventional", (isFrom ? "" : "!") + data);
|
| 226 |
+
}}, "~S,~S,~B");
|
| 227 |
+
Clazz.defineMethod (c$, "readSingleAtom",
|
| 228 |
+
function () {
|
| 229 |
+
var atom = new J.adapter.smarter.Atom ();
|
| 230 |
+
atom.set (0, 0, 0);
|
| 231 |
+
atom.atomName = this.parser.fullTrim (this.data);
|
| 232 |
+
atom.getElementSymbol ();
|
| 233 |
+
this.asc.addAtom (atom);
|
| 234 |
+
});
|
| 235 |
+
Clazz.defineMethod (c$, "getModulationReader",
|
| 236 |
+
function () {
|
| 237 |
+
return (this.modr == null ? this.initializeMSCIF () : this.modr);
|
| 238 |
+
});
|
| 239 |
+
Clazz.defineMethod (c$, "initializeMSCIF",
|
| 240 |
+
function () {
|
| 241 |
+
if (this.modr == null) this.ms = this.modr = this.getInterface ("J.adapter.readers.cif.MSCifParser");
|
| 242 |
+
this.modulated = (this.modr.initialize (this, this.modDim) > 0);
|
| 243 |
+
return this.modr;
|
| 244 |
+
});
|
| 245 |
+
Clazz.defineMethod (c$, "newModel",
|
| 246 |
+
function (modelNo) {
|
| 247 |
+
this.skipping = !this.doGetModel (this.modelNumber = modelNo, null);
|
| 248 |
+
if (this.skipping) {
|
| 249 |
+
if (!this.isMMCIF) this.parser.getTokenPeeked ();
|
| 250 |
+
return;
|
| 251 |
+
}this.chemicalName = "";
|
| 252 |
+
this.thisStructuralFormula = "";
|
| 253 |
+
this.thisFormula = "";
|
| 254 |
+
this.iHaveDesiredModel = this.isLastModel (this.modelNumber);
|
| 255 |
+
if (this.isCourseGrained) this.asc.setCurrentModelInfo ("courseGrained", Boolean.TRUE);
|
| 256 |
+
if (this.nAtoms0 == this.asc.ac) {
|
| 257 |
+
this.modelNumber--;
|
| 258 |
+
this.haveModel = false;
|
| 259 |
+
this.asc.removeCurrentAtomSet ();
|
| 260 |
+
} else {
|
| 261 |
+
this.applySymmetryAndSetTrajectory ();
|
| 262 |
+
}}, "~N");
|
| 263 |
+
Clazz.overrideMethod (c$, "finalizeSubclassReader",
|
| 264 |
+
function () {
|
| 265 |
+
if (this.asc.iSet > 0 && this.asc.getAtomSetAtomCount (this.asc.iSet) == 0) this.asc.atomSetCount--;
|
| 266 |
+
else if (!this.isMMCIF || !this.finalizeSubclass ()) this.applySymmetryAndSetTrajectory ();
|
| 267 |
+
var n = this.asc.atomSetCount;
|
| 268 |
+
if (n > 1) this.asc.setCollectionName ("<collection of " + n + " models>");
|
| 269 |
+
if (this.pdbID != null) this.asc.setCurrentModelInfo ("pdbID", this.pdbID);
|
| 270 |
+
this.finalizeReaderASCR ();
|
| 271 |
+
this.addHeader ();
|
| 272 |
+
if (this.haveAromatic) this.addJmolScript ("calculate aromatic");
|
| 273 |
+
});
|
| 274 |
+
Clazz.defineMethod (c$, "addHeader",
|
| 275 |
+
function () {
|
| 276 |
+
var header = this.parser.getFileHeader ();
|
| 277 |
+
if (header.length > 0) {
|
| 278 |
+
var s = this.setLoadNote ();
|
| 279 |
+
this.appendLoadNote (null);
|
| 280 |
+
this.appendLoadNote (header);
|
| 281 |
+
this.appendLoadNote (s);
|
| 282 |
+
this.setLoadNote ();
|
| 283 |
+
this.asc.setInfo ("fileHeader", header);
|
| 284 |
+
}});
|
| 285 |
+
Clazz.defineMethod (c$, "finalizeSubclass",
|
| 286 |
+
function () {
|
| 287 |
+
return false;
|
| 288 |
+
});
|
| 289 |
+
Clazz.overrideMethod (c$, "doPreSymmetry",
|
| 290 |
+
function () {
|
| 291 |
+
if (this.magCenterings != null) this.addLatticeVectors ();
|
| 292 |
+
if (this.modDim > 0) this.getModulationReader ().setModulation (false, null);
|
| 293 |
+
if (this.isMagCIF) {
|
| 294 |
+
this.asc.getXSymmetry ().scaleFractionalVibs ();
|
| 295 |
+
this.vibsFractional = true;
|
| 296 |
+
}});
|
| 297 |
+
Clazz.overrideMethod (c$, "applySymmetryAndSetTrajectory",
|
| 298 |
+
function () {
|
| 299 |
+
if (this.isMMCIF) this.asc.checkSpecial = false;
|
| 300 |
+
var doCheckBonding = this.doCheckUnitCell && !this.isMMCIF;
|
| 301 |
+
if (this.isMMCIF) {
|
| 302 |
+
var modelIndex = this.asc.iSet;
|
| 303 |
+
this.asc.setCurrentModelInfo ("PDB_CONECT_firstAtom_count_max", Clazz.newIntArray (-1, [this.asc.getAtomSetAtomIndex (modelIndex), this.asc.getAtomSetAtomCount (modelIndex), this.maxSerial]));
|
| 304 |
+
}if (this.htCellTypes != null) {
|
| 305 |
+
for (var e, $e = this.htCellTypes.entrySet ().iterator (); $e.hasNext () && ((e = $e.next ()) || true);) this.asc.setCurrentModelInfo ("unitcell_" + e.getKey (), e.getValue ());
|
| 306 |
+
|
| 307 |
+
this.htCellTypes = null;
|
| 308 |
+
}if (!this.haveCellWaveVector) this.modDim = 0;
|
| 309 |
+
if (this.doApplySymmetry && !this.iHaveFractionalCoordinates) this.fractionalizeCoordinates (true);
|
| 310 |
+
this.applySymTrajASCR ();
|
| 311 |
+
if (doCheckBonding && (this.bondTypes.size () > 0 || this.isMolecular)) this.setBondingAndMolecules ();
|
| 312 |
+
this.asc.setCurrentModelInfo ("fileHasUnitCell", Boolean.TRUE);
|
| 313 |
+
this.asc.xtalSymmetry = null;
|
| 314 |
+
});
|
| 315 |
+
Clazz.overrideMethod (c$, "finalizeSubclassSymmetry",
|
| 316 |
+
function (haveSymmetry) {
|
| 317 |
+
var sym = (haveSymmetry ? this.asc.getXSymmetry ().getBaseSymmetry () : null);
|
| 318 |
+
if (sym != null && sym.getSpaceGroup () == null) {
|
| 319 |
+
this.appendLoadNote ("Invalid or missing space group operations!");
|
| 320 |
+
sym = null;
|
| 321 |
+
}if (this.modDim > 0 && sym != null) {
|
| 322 |
+
this.addLatticeVectors ();
|
| 323 |
+
this.asc.setTensors ();
|
| 324 |
+
this.getModulationReader ().setModulation (true, sym);
|
| 325 |
+
this.modr.finalizeModulation ();
|
| 326 |
+
}if (this.isMagCIF) {
|
| 327 |
+
this.asc.setNoAutoBond ();
|
| 328 |
+
if (sym != null) {
|
| 329 |
+
this.addJmolScript ("vectors on;vectors 0.15;");
|
| 330 |
+
var n = this.asc.getXSymmetry ().setSpinVectors ();
|
| 331 |
+
this.appendLoadNote (n + " magnetic moments - use VECTORS ON/OFF or VECTOR MAX x.x or SELECT VXYZ>0");
|
| 332 |
+
}}if (sym != null && this.auditBlockCode != null && this.auditBlockCode.contains ("REFRNCE")) {
|
| 333 |
+
if (this.htAudit == null) this.htAudit = new java.util.Hashtable ();
|
| 334 |
+
this.htAudit.put (this.auditBlockCode, sym);
|
| 335 |
+
}}, "~B");
|
| 336 |
+
Clazz.defineMethod (c$, "processDataParameter",
|
| 337 |
+
function () {
|
| 338 |
+
this.bondTypes.clear ();
|
| 339 |
+
this.parser.getTokenPeeked ();
|
| 340 |
+
this.thisDataSetName = (this.key.length < 6 ? "" : this.key.substring (5));
|
| 341 |
+
if (this.thisDataSetName.length > 0) this.nextAtomSet ();
|
| 342 |
+
if (this.debugging) JU.Logger.debug (this.key);
|
| 343 |
+
});
|
| 344 |
+
Clazz.defineMethod (c$, "nextAtomSet",
|
| 345 |
+
function () {
|
| 346 |
+
this.asc.setCurrentModelInfo ("isCIF", Boolean.TRUE);
|
| 347 |
+
if (this.asc.iSet >= 0) {
|
| 348 |
+
if (this.isMMCIF) {
|
| 349 |
+
this.setModelPDB (true);
|
| 350 |
+
if (this.pdbID != null) this.asc.setCurrentModelInfo ("pdbID", this.pdbID);
|
| 351 |
+
}this.asc.newAtomSet ();
|
| 352 |
+
if (this.isMMCIF) {
|
| 353 |
+
this.setModelPDB (true);
|
| 354 |
+
if (this.pdbID != null) this.asc.setCurrentModelInfo ("pdbID", this.pdbID);
|
| 355 |
+
}} else {
|
| 356 |
+
this.asc.setCollectionName (this.thisDataSetName);
|
| 357 |
+
}});
|
| 358 |
+
Clazz.defineMethod (c$, "processChemicalInfo",
|
| 359 |
+
function (type) {
|
| 360 |
+
if (type.equals ("name")) {
|
| 361 |
+
this.chemicalName = this.data = this.parser.fullTrim (this.data);
|
| 362 |
+
this.appendLoadNote (this.chemicalName);
|
| 363 |
+
if (!this.data.equals ("?")) this.asc.setInfo ("modelLoadNote", this.data);
|
| 364 |
+
} else if (type.equals ("structuralFormula")) {
|
| 365 |
+
this.thisStructuralFormula = this.data = this.parser.fullTrim (this.data);
|
| 366 |
+
} else if (type.equals ("formula")) {
|
| 367 |
+
this.thisFormula = this.data = this.parser.fullTrim (this.data);
|
| 368 |
+
if (this.thisFormula.length > 1) this.appendLoadNote (this.thisFormula);
|
| 369 |
+
}if (this.debugging) {
|
| 370 |
+
JU.Logger.debug (type + " = " + this.data);
|
| 371 |
+
}return this.data;
|
| 372 |
+
}, "~S");
|
| 373 |
+
Clazz.defineMethod (c$, "processSymmetrySpaceGroupName",
|
| 374 |
+
function () {
|
| 375 |
+
if (this.key.indexOf ("_ssg_name") >= 0) {
|
| 376 |
+
this.modulated = true;
|
| 377 |
+
this.$latticeType = this.data.substring (0, 1);
|
| 378 |
+
} else if (this.modulated) {
|
| 379 |
+
return;
|
| 380 |
+
}this.data = this.parser.toUnicode (this.data);
|
| 381 |
+
this.setSpaceGroupName (this.lastSpaceGroupName = (this.key.indexOf ("h-m") > 0 ? "HM:" : this.modulated ? "SSG:" : this.key.indexOf ("bns") >= 0 ? "BNS:" : this.key.indexOf ("hall") >= 0 ? "Hall:" : "") + this.data);
|
| 382 |
+
});
|
| 383 |
+
Clazz.defineMethod (c$, "addLatticeVectors",
|
| 384 |
+
function () {
|
| 385 |
+
this.lattvecs = null;
|
| 386 |
+
if (this.magCenterings != null) {
|
| 387 |
+
this.$latticeType = "Magnetic";
|
| 388 |
+
this.lattvecs = new JU.Lst ();
|
| 389 |
+
for (var i = 0; i < this.magCenterings.size (); i++) {
|
| 390 |
+
var s = this.magCenterings.get (i);
|
| 391 |
+
var f = Clazz.newFloatArray (this.modDim + 4, 0);
|
| 392 |
+
if (s.indexOf ("x1") >= 0) for (var j = 1; j <= this.modDim + 3; j++) s = JU.PT.rep (s, "x" + j, "");
|
| 393 |
+
|
| 394 |
+
var tokens = JU.PT.split (JU.PT.replaceAllCharacters (s, "xyz+", ""), ",");
|
| 395 |
+
var n = 0;
|
| 396 |
+
for (var j = 0; j < tokens.length; j++) {
|
| 397 |
+
s = tokens[j].trim ();
|
| 398 |
+
if (s.length == 0) continue;
|
| 399 |
+
if ((f[j] = JU.PT.parseFloatFraction (s)) != 0) n++;
|
| 400 |
+
}
|
| 401 |
+
if (n >= 2) this.lattvecs.addLast (f);
|
| 402 |
+
}
|
| 403 |
+
this.magCenterings = null;
|
| 404 |
+
} else if (this.$latticeType != null && "ABCFI".indexOf (this.$latticeType) >= 0) {
|
| 405 |
+
this.lattvecs = new JU.Lst ();
|
| 406 |
+
try {
|
| 407 |
+
this.ms.addLatticeVector (this.lattvecs, this.$latticeType);
|
| 408 |
+
} catch (e) {
|
| 409 |
+
if (Clazz.exceptionOf (e, Exception)) {
|
| 410 |
+
} else {
|
| 411 |
+
throw e;
|
| 412 |
+
}
|
| 413 |
+
}
|
| 414 |
+
}if (this.lattvecs != null && this.lattvecs.size () > 0 && this.asc.getSymmetry ().addLatticeVectors (this.lattvecs)) {
|
| 415 |
+
this.appendLoadNote ("Note! " + this.lattvecs.size () + " symmetry operators added for lattice centering " + this.$latticeType);
|
| 416 |
+
for (var i = 0; i < this.lattvecs.size (); i++) this.appendLoadNote (JU.PT.toJSON (null, this.lattvecs.get (i)));
|
| 417 |
+
|
| 418 |
+
}this.$latticeType = null;
|
| 419 |
+
});
|
| 420 |
+
Clazz.defineMethod (c$, "processCellParameter",
|
| 421 |
+
function () {
|
| 422 |
+
for (var i = J.api.JmolAdapter.cellParamNames.length; --i >= 0; ) if (this.key.equals (J.api.JmolAdapter.cellParamNames[i])) {
|
| 423 |
+
var p = this.parseFloatStr (this.data);
|
| 424 |
+
if (this.rotateHexCell && i == 5 && p == 120) p = -1;
|
| 425 |
+
this.setUnitCellItem (i, p);
|
| 426 |
+
return;
|
| 427 |
+
}
|
| 428 |
+
});
|
| 429 |
+
Clazz.defineMethod (c$, "processUnitCellTransformMatrix",
|
| 430 |
+
function () {
|
| 431 |
+
var v = this.parseFloatStr (this.data);
|
| 432 |
+
if (Float.isNaN (v)) return;
|
| 433 |
+
for (var i = 0; i < J.adapter.readers.cif.CifReader.TransformFields.length; i++) {
|
| 434 |
+
if (this.key.indexOf (J.adapter.readers.cif.CifReader.TransformFields[i]) >= 0) {
|
| 435 |
+
this.setUnitCellItem (6 + i, v);
|
| 436 |
+
return;
|
| 437 |
+
}}
|
| 438 |
+
});
|
| 439 |
+
Clazz.defineMethod (c$, "getData",
|
| 440 |
+
function () {
|
| 441 |
+
this.key = this.parser.getTokenPeeked ();
|
| 442 |
+
this.data = this.parser.getNextToken ();
|
| 443 |
+
if (this.debugging && this.data != null && this.data.length > 0 && this.data.charAt (0) != '\0') JU.Logger.debug (">> " + this.key + " " + this.data);
|
| 444 |
+
if (this.data == null) {
|
| 445 |
+
JU.Logger.warn ("CIF ERROR ? end of file; data missing: " + this.key);
|
| 446 |
+
return false;
|
| 447 |
+
}return (this.data.length == 0 || this.data.charAt (0) != '\0');
|
| 448 |
+
});
|
| 449 |
+
Clazz.defineMethod (c$, "processLoopBlock",
|
| 450 |
+
function () {
|
| 451 |
+
this.parser.getTokenPeeked ();
|
| 452 |
+
this.key = this.parser.peekToken ();
|
| 453 |
+
if (this.key == null) return;
|
| 454 |
+
this.key = this.parser.fixKey (this.key0 = this.key);
|
| 455 |
+
if (this.modDim > 0) switch (this.getModulationReader ().processLoopBlock ()) {
|
| 456 |
+
case 0:
|
| 457 |
+
break;
|
| 458 |
+
case -1:
|
| 459 |
+
this.parser.skipLoop (false);
|
| 460 |
+
case 1:
|
| 461 |
+
return;
|
| 462 |
+
}
|
| 463 |
+
var isLigand = false;
|
| 464 |
+
if (this.key.startsWith ("_atom_site") || (isLigand = this.key.equals ("_chem_comp_atom_comp_id"))) {
|
| 465 |
+
if (!this.processAtomSiteLoopBlock (isLigand)) return;
|
| 466 |
+
if (this.thisDataSetName.equals ("global")) this.asc.setCollectionName (this.thisDataSetName = this.chemicalName);
|
| 467 |
+
if (!this.thisDataSetName.equals (this.lastDataSetName)) {
|
| 468 |
+
this.asc.setAtomSetName (this.thisDataSetName);
|
| 469 |
+
this.lastDataSetName = this.thisDataSetName;
|
| 470 |
+
}this.asc.setCurrentModelInfo ("chemicalName", this.chemicalName);
|
| 471 |
+
this.asc.setCurrentModelInfo ("structuralFormula", this.thisStructuralFormula);
|
| 472 |
+
this.asc.setCurrentModelInfo ("formula", this.thisFormula);
|
| 473 |
+
return;
|
| 474 |
+
}if (this.key.startsWith ("_space_group_symop") || this.key.startsWith ("_symmetry_equiv_pos") || this.key.startsWith ("_symmetry_ssg_equiv")) {
|
| 475 |
+
if (this.ignoreFileSymmetryOperators) {
|
| 476 |
+
JU.Logger.warn ("ignoring file-based symmetry operators");
|
| 477 |
+
this.parser.skipLoop (false);
|
| 478 |
+
} else {
|
| 479 |
+
this.processSymmetryOperationsLoopBlock ();
|
| 480 |
+
}return;
|
| 481 |
+
}if (this.key.startsWith ("_citation")) {
|
| 482 |
+
this.processCitationListBlock ();
|
| 483 |
+
return;
|
| 484 |
+
}if (this.key.startsWith ("_atom_type")) {
|
| 485 |
+
this.processAtomTypeLoopBlock ();
|
| 486 |
+
return;
|
| 487 |
+
}if ((this.isMolecular || !this.doApplySymmetry) && this.key.startsWith ("_geom_bond")) {
|
| 488 |
+
this.processGeomBondLoopBlock ();
|
| 489 |
+
return;
|
| 490 |
+
}if (this.processSubclassLoopBlock ()) return;
|
| 491 |
+
if (this.key.equals ("_propagation_vector_seq_id")) {
|
| 492 |
+
this.addMore ();
|
| 493 |
+
return;
|
| 494 |
+
}this.parser.skipLoop (false);
|
| 495 |
+
});
|
| 496 |
+
Clazz.defineMethod (c$, "processSubclassLoopBlock",
|
| 497 |
+
function () {
|
| 498 |
+
return false;
|
| 499 |
+
});
|
| 500 |
+
Clazz.defineMethod (c$, "addMore",
|
| 501 |
+
function () {
|
| 502 |
+
var str;
|
| 503 |
+
var n = 0;
|
| 504 |
+
try {
|
| 505 |
+
while ((str = this.parser.peekToken ()) != null && str.charAt (0) == '_') {
|
| 506 |
+
this.parser.getTokenPeeked ();
|
| 507 |
+
n++;
|
| 508 |
+
}
|
| 509 |
+
var m = 0;
|
| 510 |
+
var s = "";
|
| 511 |
+
while ((str = this.parser.getNextDataToken ()) != null) {
|
| 512 |
+
s += str + (m % n == 0 ? "=" : " ");
|
| 513 |
+
if (++m % n == 0) {
|
| 514 |
+
this.appendUunitCellInfo (s.trim ());
|
| 515 |
+
s = "";
|
| 516 |
+
}}
|
| 517 |
+
} catch (e) {
|
| 518 |
+
if (Clazz.exceptionOf (e, Exception)) {
|
| 519 |
+
} else {
|
| 520 |
+
throw e;
|
| 521 |
+
}
|
| 522 |
+
}
|
| 523 |
+
});
|
| 524 |
+
Clazz.defineMethod (c$, "fieldProperty",
|
| 525 |
+
function (i) {
|
| 526 |
+
return (i >= 0 && (this.field = this.parser.getColumnData (i)).length > 0 && (this.firstChar = this.field.charAt (0)) != '\0' ? this.col2key[i] : -1);
|
| 527 |
+
}, "~N");
|
| 528 |
+
Clazz.defineMethod (c$, "parseLoopParameters",
|
| 529 |
+
function (fields) {
|
| 530 |
+
this.parser.parseDataBlockParameters (fields, this.isLoop ? null : this.key0, this.data, this.key2col, this.col2key);
|
| 531 |
+
}, "~A");
|
| 532 |
+
Clazz.defineMethod (c$, "parseLoopParametersFor",
|
| 533 |
+
function (key, fields) {
|
| 534 |
+
if (fields[0].charAt (0) == '*') for (var i = fields.length; --i >= 0; ) if (fields[i].charAt (0) == '*') fields[i] = key + fields[i].substring (1);
|
| 535 |
+
|
| 536 |
+
this.parseLoopParameters (fields);
|
| 537 |
+
}, "~S,~A");
|
| 538 |
+
Clazz.defineMethod (c$, "disableField",
|
| 539 |
+
function (fieldIndex) {
|
| 540 |
+
var i = this.key2col[fieldIndex];
|
| 541 |
+
if (i != -1) this.col2key[i] = -1;
|
| 542 |
+
}, "~N");
|
| 543 |
+
Clazz.defineMethod (c$, "processAtomTypeLoopBlock",
|
| 544 |
+
function () {
|
| 545 |
+
this.parseLoopParameters (J.adapter.readers.cif.CifReader.atomTypeFields);
|
| 546 |
+
if (!this.checkAllFieldsPresent (J.adapter.readers.cif.CifReader.atomTypeFields, -1, false)) {
|
| 547 |
+
this.parser.skipLoop (false);
|
| 548 |
+
return;
|
| 549 |
+
}var atomTypeSymbol;
|
| 550 |
+
var oxidationNumber = 0;
|
| 551 |
+
while (this.parser.getData ()) {
|
| 552 |
+
if (this.isNull (atomTypeSymbol = this.getField (0)) || Float.isNaN (oxidationNumber = this.parseFloatStr (this.getField (1)))) continue;
|
| 553 |
+
if (this.htOxStates == null) this.htOxStates = new java.util.Hashtable ();
|
| 554 |
+
this.htOxStates.put (atomTypeSymbol, Float.$valueOf (oxidationNumber));
|
| 555 |
+
}
|
| 556 |
+
});
|
| 557 |
+
Clazz.defineMethod (c$, "processAtomSiteLoopBlock",
|
| 558 |
+
function (isLigand) {
|
| 559 |
+
this.isLigand = isLigand;
|
| 560 |
+
var pdbModelNo = -1;
|
| 561 |
+
var haveCoord = true;
|
| 562 |
+
this.parseLoopParametersFor ("_atom_site", J.adapter.readers.cif.CifReader.atomFields);
|
| 563 |
+
if (this.key2col[55] != -1) {
|
| 564 |
+
this.setFractionalCoordinates (false);
|
| 565 |
+
} else if (this.key2col[6] != -1 || this.key2col[52] != -1) {
|
| 566 |
+
this.setFractionalCoordinates (false);
|
| 567 |
+
this.disableField (3);
|
| 568 |
+
this.disableField (4);
|
| 569 |
+
this.disableField (5);
|
| 570 |
+
if (this.key2col[16] != -1 && !this.isMMCIF) {
|
| 571 |
+
this.setIsPDB ();
|
| 572 |
+
this.isMMCIF = true;
|
| 573 |
+
}} else if (this.key2col[3] != -1) {
|
| 574 |
+
this.setFractionalCoordinates (true);
|
| 575 |
+
this.disableField (6);
|
| 576 |
+
this.disableField (7);
|
| 577 |
+
this.disableField (8);
|
| 578 |
+
} else if (this.key2col[20] != -1 || this.key2col[21] != -1 || this.key2col[63] != -1) {
|
| 579 |
+
haveCoord = false;
|
| 580 |
+
} else {
|
| 581 |
+
this.parser.skipLoop (false);
|
| 582 |
+
return false;
|
| 583 |
+
}var modelField = this.key2col[17];
|
| 584 |
+
var siteMult = 0;
|
| 585 |
+
while (this.parser.getData ()) {
|
| 586 |
+
if (modelField >= 0) {
|
| 587 |
+
pdbModelNo = this.checkPDBModelField (modelField, pdbModelNo);
|
| 588 |
+
if (pdbModelNo < 0) break;
|
| 589 |
+
if (this.skipping) continue;
|
| 590 |
+
}var atom = null;
|
| 591 |
+
if (haveCoord) {
|
| 592 |
+
atom = new J.adapter.smarter.Atom ();
|
| 593 |
+
} else {
|
| 594 |
+
if (this.fieldProperty (this.key2col[20]) != -1 || this.fieldProperty (this.key2col[21]) != -1 || this.fieldProperty (this.key2col[63]) != -1) {
|
| 595 |
+
if ((atom = this.asc.getAtomFromName (this.field)) == null) continue;
|
| 596 |
+
} else {
|
| 597 |
+
continue;
|
| 598 |
+
}}var componentId = null;
|
| 599 |
+
var strChain = null;
|
| 600 |
+
var id = null;
|
| 601 |
+
var seqID = 0;
|
| 602 |
+
var n = this.parser.getColumnCount ();
|
| 603 |
+
for (var i = 0; i < n; ++i) {
|
| 604 |
+
var tok = this.fieldProperty (i);
|
| 605 |
+
switch (tok) {
|
| 606 |
+
case -1:
|
| 607 |
+
break;
|
| 608 |
+
case 71:
|
| 609 |
+
seqID = this.parseIntStr (this.field);
|
| 610 |
+
break;
|
| 611 |
+
case 70:
|
| 612 |
+
id = this.field;
|
| 613 |
+
break;
|
| 614 |
+
case 50:
|
| 615 |
+
case 0:
|
| 616 |
+
var elementSymbol;
|
| 617 |
+
if (this.field.length < 2) {
|
| 618 |
+
elementSymbol = this.field;
|
| 619 |
+
} else {
|
| 620 |
+
var ch1 = Character.toLowerCase (this.field.charAt (1));
|
| 621 |
+
if (J.adapter.smarter.Atom.isValidSym2 (this.firstChar, ch1)) {
|
| 622 |
+
elementSymbol = "" + this.firstChar + ch1;
|
| 623 |
+
} else {
|
| 624 |
+
elementSymbol = "" + this.firstChar;
|
| 625 |
+
if (!this.haveHAtoms && this.firstChar == 'H') this.haveHAtoms = true;
|
| 626 |
+
}}atom.elementSymbol = elementSymbol;
|
| 627 |
+
if (this.htOxStates != null && this.htOxStates.containsKey (this.field)) {
|
| 628 |
+
var charge = this.htOxStates.get (this.field).floatValue ();
|
| 629 |
+
atom.formalCharge = Math.round (charge);
|
| 630 |
+
if (Math.abs (atom.formalCharge - charge) > 0.1) if (this.debugging) {
|
| 631 |
+
JU.Logger.debug ("CIF charge on " + this.field + " was " + charge + "; rounded to " + atom.formalCharge);
|
| 632 |
+
}}break;
|
| 633 |
+
case 49:
|
| 634 |
+
case 1:
|
| 635 |
+
case 2:
|
| 636 |
+
atom.atomName = this.field;
|
| 637 |
+
break;
|
| 638 |
+
case 55:
|
| 639 |
+
var x = this.parseFloatStr (this.field);
|
| 640 |
+
if (this.readIdeal && !Float.isNaN (x)) atom.x = x;
|
| 641 |
+
break;
|
| 642 |
+
case 56:
|
| 643 |
+
var y = this.parseFloatStr (this.field);
|
| 644 |
+
if (this.readIdeal && !Float.isNaN (y)) atom.y = y;
|
| 645 |
+
break;
|
| 646 |
+
case 57:
|
| 647 |
+
var z = this.parseFloatStr (this.field);
|
| 648 |
+
if (this.readIdeal && !Float.isNaN (z)) atom.z = z;
|
| 649 |
+
break;
|
| 650 |
+
case 52:
|
| 651 |
+
case 6:
|
| 652 |
+
case 3:
|
| 653 |
+
atom.x = this.parseFloatStr (this.field);
|
| 654 |
+
break;
|
| 655 |
+
case 53:
|
| 656 |
+
case 7:
|
| 657 |
+
case 4:
|
| 658 |
+
atom.y = this.parseFloatStr (this.field);
|
| 659 |
+
break;
|
| 660 |
+
case 54:
|
| 661 |
+
case 8:
|
| 662 |
+
case 5:
|
| 663 |
+
atom.z = this.parseFloatStr (this.field);
|
| 664 |
+
break;
|
| 665 |
+
case 51:
|
| 666 |
+
atom.formalCharge = this.parseIntStr (this.field);
|
| 667 |
+
break;
|
| 668 |
+
case 9:
|
| 669 |
+
var floatOccupancy = this.parseFloatStr (this.field);
|
| 670 |
+
if (!Float.isNaN (floatOccupancy)) atom.foccupancy = floatOccupancy;
|
| 671 |
+
break;
|
| 672 |
+
case 10:
|
| 673 |
+
atom.bfactor = this.parseFloatStr (this.field) * (this.isMMCIF ? 1 : 100);
|
| 674 |
+
break;
|
| 675 |
+
case 48:
|
| 676 |
+
case 11:
|
| 677 |
+
atom.group3 = this.field;
|
| 678 |
+
break;
|
| 679 |
+
case 59:
|
| 680 |
+
componentId = this.field;
|
| 681 |
+
if (!this.useAuthorChainID) this.setChainID (atom, strChain = this.field);
|
| 682 |
+
break;
|
| 683 |
+
case 12:
|
| 684 |
+
if (this.useAuthorChainID) this.setChainID (atom, strChain = this.field);
|
| 685 |
+
break;
|
| 686 |
+
case 13:
|
| 687 |
+
this.maxSerial = Math.max (this.maxSerial, atom.sequenceNumber = this.parseIntStr (this.field));
|
| 688 |
+
break;
|
| 689 |
+
case 14:
|
| 690 |
+
atom.insertionCode = this.firstChar;
|
| 691 |
+
break;
|
| 692 |
+
case 15:
|
| 693 |
+
case 60:
|
| 694 |
+
atom.altLoc = this.firstChar;
|
| 695 |
+
break;
|
| 696 |
+
case 58:
|
| 697 |
+
this.disorderAssembly = this.field;
|
| 698 |
+
break;
|
| 699 |
+
case 19:
|
| 700 |
+
if (this.firstChar == '-' && this.field.length > 1) {
|
| 701 |
+
atom.altLoc = this.field.charAt (1);
|
| 702 |
+
atom.ignoreSymmetry = true;
|
| 703 |
+
} else {
|
| 704 |
+
atom.altLoc = this.firstChar;
|
| 705 |
+
}break;
|
| 706 |
+
case 16:
|
| 707 |
+
if ("HETATM".equals (this.field)) atom.isHetero = true;
|
| 708 |
+
break;
|
| 709 |
+
case 18:
|
| 710 |
+
if ("dum".equals (this.field)) {
|
| 711 |
+
atom.x = NaN;
|
| 712 |
+
continue;
|
| 713 |
+
}break;
|
| 714 |
+
case 61:
|
| 715 |
+
if (this.modulated) siteMult = this.parseIntStr (this.field);
|
| 716 |
+
break;
|
| 717 |
+
case 62:
|
| 718 |
+
case 47:
|
| 719 |
+
if (this.field.equalsIgnoreCase ("Uiso")) {
|
| 720 |
+
var j = this.key2col[34];
|
| 721 |
+
if (j != -1) this.asc.setU (atom, 7, this.parseFloatStr (this.parser.getColumnData (j)));
|
| 722 |
+
}break;
|
| 723 |
+
case 22:
|
| 724 |
+
case 23:
|
| 725 |
+
case 24:
|
| 726 |
+
case 25:
|
| 727 |
+
case 26:
|
| 728 |
+
case 27:
|
| 729 |
+
case 28:
|
| 730 |
+
case 29:
|
| 731 |
+
case 30:
|
| 732 |
+
case 31:
|
| 733 |
+
case 32:
|
| 734 |
+
case 33:
|
| 735 |
+
this.asc.setU (atom, (this.col2key[i] - 22) % 6, this.parseFloatStr (this.field));
|
| 736 |
+
break;
|
| 737 |
+
case 35:
|
| 738 |
+
case 36:
|
| 739 |
+
case 37:
|
| 740 |
+
case 38:
|
| 741 |
+
case 39:
|
| 742 |
+
case 40:
|
| 743 |
+
this.asc.setU (atom, 6, 4);
|
| 744 |
+
this.asc.setU (atom, (this.col2key[i] - 35) % 6, this.parseFloatStr (this.field));
|
| 745 |
+
break;
|
| 746 |
+
case 41:
|
| 747 |
+
case 42:
|
| 748 |
+
case 43:
|
| 749 |
+
case 44:
|
| 750 |
+
case 45:
|
| 751 |
+
case 46:
|
| 752 |
+
this.asc.setU (atom, 6, 0);
|
| 753 |
+
this.asc.setU (atom, (this.col2key[i] - 41) % 6, this.parseFloatStr (this.field));
|
| 754 |
+
break;
|
| 755 |
+
case 64:
|
| 756 |
+
case 65:
|
| 757 |
+
case 66:
|
| 758 |
+
case 67:
|
| 759 |
+
case 68:
|
| 760 |
+
case 69:
|
| 761 |
+
this.isMagCIF = true;
|
| 762 |
+
var pt = atom.vib;
|
| 763 |
+
if (pt == null) atom.vib = pt = new JU.Vibration ().setType (-2);
|
| 764 |
+
var v = this.parseFloatStr (this.field);
|
| 765 |
+
switch (tok) {
|
| 766 |
+
case 64:
|
| 767 |
+
case 67:
|
| 768 |
+
pt.x = v;
|
| 769 |
+
this.appendLoadNote ("magnetic moment: " + this.line);
|
| 770 |
+
break;
|
| 771 |
+
case 65:
|
| 772 |
+
case 68:
|
| 773 |
+
pt.y = v;
|
| 774 |
+
break;
|
| 775 |
+
case 66:
|
| 776 |
+
case 69:
|
| 777 |
+
pt.z = v;
|
| 778 |
+
break;
|
| 779 |
+
}
|
| 780 |
+
break;
|
| 781 |
+
}
|
| 782 |
+
}
|
| 783 |
+
if (!haveCoord) continue;
|
| 784 |
+
if (Float.isNaN (atom.x) || Float.isNaN (atom.y) || Float.isNaN (atom.z)) {
|
| 785 |
+
JU.Logger.warn ("atom " + atom.atomName + " has invalid/unknown coordinates");
|
| 786 |
+
continue;
|
| 787 |
+
}if (atom.elementSymbol == null && atom.atomName != null) atom.getElementSymbol ();
|
| 788 |
+
if (!this.filterCIFAtom (atom, componentId)) continue;
|
| 789 |
+
this.setAtomCoord (atom);
|
| 790 |
+
if (this.isMMCIF && !this.processSubclassAtom (atom, componentId, strChain)) continue;
|
| 791 |
+
if (this.asc.iSet < 0) this.nextAtomSet ();
|
| 792 |
+
this.asc.addAtomWithMappedName (atom);
|
| 793 |
+
if (id != null) {
|
| 794 |
+
this.asc.atomSymbolicMap.put (id, atom);
|
| 795 |
+
if (seqID > 0) {
|
| 796 |
+
var pt = atom.vib;
|
| 797 |
+
if (pt == null) pt = this.asc.addVibrationVector (atom.index, 0, NaN, 1094713365);
|
| 798 |
+
pt.x = seqID;
|
| 799 |
+
}}this.ac++;
|
| 800 |
+
if (this.modDim > 0 && siteMult != 0) atom.vib = JU.V3.new3 (siteMult, 0, NaN);
|
| 801 |
+
}
|
| 802 |
+
this.asc.setCurrentModelInfo ("isCIF", Boolean.TRUE);
|
| 803 |
+
if (this.isMMCIF) this.setModelPDB (true);
|
| 804 |
+
if (this.isMMCIF && this.skipping) this.skipping = false;
|
| 805 |
+
return true;
|
| 806 |
+
}, "~B");
|
| 807 |
+
Clazz.defineMethod (c$, "checkPDBModelField",
|
| 808 |
+
function (modelField, currentModelNo) {
|
| 809 |
+
return 0;
|
| 810 |
+
}, "~N,~N");
|
| 811 |
+
Clazz.defineMethod (c$, "processSubclassAtom",
|
| 812 |
+
function (atom, assemblyId, strChain) {
|
| 813 |
+
return true;
|
| 814 |
+
}, "J.adapter.smarter.Atom,~S,~S");
|
| 815 |
+
Clazz.defineMethod (c$, "filterCIFAtom",
|
| 816 |
+
function (atom, componentId) {
|
| 817 |
+
if (!this.filterAtom (atom, -1)) return false;
|
| 818 |
+
if (this.filterAssembly && this.filterReject (this.filter, "$", componentId)) return false;
|
| 819 |
+
if (this.configurationPtr > 0) {
|
| 820 |
+
if (!this.disorderAssembly.equals (this.lastDisorderAssembly)) {
|
| 821 |
+
this.lastDisorderAssembly = this.disorderAssembly;
|
| 822 |
+
this.lastAltLoc = '\0';
|
| 823 |
+
this.conformationIndex = this.configurationPtr;
|
| 824 |
+
}if (atom.altLoc != '\0') {
|
| 825 |
+
if (this.conformationIndex >= 0 && atom.altLoc != this.lastAltLoc) {
|
| 826 |
+
this.lastAltLoc = atom.altLoc;
|
| 827 |
+
this.conformationIndex--;
|
| 828 |
+
}if (this.conformationIndex != 0) {
|
| 829 |
+
JU.Logger.info ("ignoring " + atom.atomName);
|
| 830 |
+
return false;
|
| 831 |
+
}}}return true;
|
| 832 |
+
}, "J.adapter.smarter.Atom,~S");
|
| 833 |
+
Clazz.defineMethod (c$, "processCitationListBlock",
|
| 834 |
+
function () {
|
| 835 |
+
this.parseLoopParameters (J.adapter.readers.cif.CifReader.citationFields);
|
| 836 |
+
while (this.parser.getData ()) {
|
| 837 |
+
var title = this.getField (0);
|
| 838 |
+
if (!this.isNull (title)) this.appendLoadNote ("TITLE: " + this.parser.toUnicode (title));
|
| 839 |
+
}
|
| 840 |
+
});
|
| 841 |
+
Clazz.defineMethod (c$, "processSymmetryOperationsLoopBlock",
|
| 842 |
+
function () {
|
| 843 |
+
this.parseLoopParametersFor ("_space_group_symop", J.adapter.readers.cif.CifReader.symmetryOperationsFields);
|
| 844 |
+
var n;
|
| 845 |
+
this.symops = new JU.Lst ();
|
| 846 |
+
for (n = J.adapter.readers.cif.CifReader.symmetryOperationsFields.length; --n >= 0; ) if (this.key2col[n] != -1) break;
|
| 847 |
+
|
| 848 |
+
if (n < 0) {
|
| 849 |
+
JU.Logger.warn ("required _space_group_symop key not found");
|
| 850 |
+
this.parser.skipLoop (false);
|
| 851 |
+
return;
|
| 852 |
+
}n = 0;
|
| 853 |
+
var isMag = false;
|
| 854 |
+
while (this.parser.getData ()) {
|
| 855 |
+
var ssgop = false;
|
| 856 |
+
var nn = this.parser.getColumnCount ();
|
| 857 |
+
var timeRev = (this.fieldProperty (this.key2col[7]) == -1 && this.fieldProperty (this.key2col[8]) == -1 && this.fieldProperty (this.key2col[6]) == -1 ? 0 : this.field.equals ("-1") ? -1 : 1);
|
| 858 |
+
for (var i = 0, tok; i < nn; ++i) {
|
| 859 |
+
switch (tok = this.fieldProperty (i)) {
|
| 860 |
+
case 5:
|
| 861 |
+
if (this.field.indexOf ('~') >= 0) this.field = JU.PT.rep (this.field, "~", "");
|
| 862 |
+
case 2:
|
| 863 |
+
case 3:
|
| 864 |
+
this.modulated = true;
|
| 865 |
+
ssgop = true;
|
| 866 |
+
case 0:
|
| 867 |
+
case 4:
|
| 868 |
+
case 1:
|
| 869 |
+
if (this.allowRotations || timeRev != 0 || ++n == 1) if (!this.modulated || ssgop) {
|
| 870 |
+
if (tok == 1 || tok == 3) {
|
| 871 |
+
isMag = true;
|
| 872 |
+
timeRev = (this.field.endsWith (",+1") || this.field.endsWith (",1") ? 1 : this.field.endsWith (",-1") ? -1 : 0);
|
| 873 |
+
if (timeRev != 0) this.field = this.field.substring (0, this.field.lastIndexOf (','));
|
| 874 |
+
}if (timeRev != 0) this.field += "," + (timeRev == 1 ? "m" : "-m");
|
| 875 |
+
this.field = this.field.$replace (';', ' ');
|
| 876 |
+
this.symops.addLast (this.field);
|
| 877 |
+
this.setSymmetryOperator (this.field);
|
| 878 |
+
}break;
|
| 879 |
+
case 9:
|
| 880 |
+
case 10:
|
| 881 |
+
case 11:
|
| 882 |
+
isMag = true;
|
| 883 |
+
if (this.magCenterings == null) this.magCenterings = new JU.Lst ();
|
| 884 |
+
this.magCenterings.addLast (this.field);
|
| 885 |
+
break;
|
| 886 |
+
}
|
| 887 |
+
}
|
| 888 |
+
}
|
| 889 |
+
if (this.ms != null && !isMag) this.addLatticeVectors ();
|
| 890 |
+
});
|
| 891 |
+
Clazz.defineMethod (c$, "getBondOrder",
|
| 892 |
+
function (field) {
|
| 893 |
+
switch (field.toUpperCase ().charAt (0)) {
|
| 894 |
+
default:
|
| 895 |
+
JU.Logger.warn ("unknown CIF bond order: " + field);
|
| 896 |
+
case '\0':
|
| 897 |
+
case 'S':
|
| 898 |
+
return 1;
|
| 899 |
+
case 'D':
|
| 900 |
+
return 2;
|
| 901 |
+
case 'T':
|
| 902 |
+
return 3;
|
| 903 |
+
case 'Q':
|
| 904 |
+
return 4;
|
| 905 |
+
case 'A':
|
| 906 |
+
this.haveAromatic = true;
|
| 907 |
+
return 515;
|
| 908 |
+
}
|
| 909 |
+
}, "~S");
|
| 910 |
+
Clazz.defineMethod (c$, "processGeomBondLoopBlock",
|
| 911 |
+
function () {
|
| 912 |
+
var bondLoopBug = (this.stateScriptVersionInt >= 130304 && this.stateScriptVersionInt < 140403 || this.stateScriptVersionInt >= 150000 && this.stateScriptVersionInt < 150403);
|
| 913 |
+
this.parseLoopParameters (J.adapter.readers.cif.CifReader.geomBondFields);
|
| 914 |
+
if (bondLoopBug || !this.checkAllFieldsPresent (J.adapter.readers.cif.CifReader.geomBondFields, 2, true)) {
|
| 915 |
+
this.parser.skipLoop (false);
|
| 916 |
+
return;
|
| 917 |
+
}var bondCount = 0;
|
| 918 |
+
var name1;
|
| 919 |
+
var name2 = null;
|
| 920 |
+
while (this.parser.getData ()) {
|
| 921 |
+
name2 = null;
|
| 922 |
+
if (this.asc.getAtomIndex (name1 = this.getField (0)) < 0 || this.asc.getAtomIndex (name2 = this.getField (1)) < 0) {
|
| 923 |
+
if (name2 == null && this.asc.getAtomIndex (name1 = name1.toUpperCase ()) < 0 || this.asc.getAtomIndex (name2 = name2.toUpperCase ()) < 0) continue;
|
| 924 |
+
}var order = this.getBondOrder (this.getField (3));
|
| 925 |
+
var sdist = this.getField (2);
|
| 926 |
+
var distance = this.parseFloatStr (sdist);
|
| 927 |
+
if (distance == 0 || Float.isNaN (distance)) {
|
| 928 |
+
if (!this.iHaveFractionalCoordinates) {
|
| 929 |
+
var a = this.asc.getAtomFromName (name1);
|
| 930 |
+
var b = this.asc.getAtomFromName (name2);
|
| 931 |
+
if (a != null && b != null) this.asc.addNewBondWithOrder (a.index, b.index, order);
|
| 932 |
+
}continue;
|
| 933 |
+
}var dx = 0;
|
| 934 |
+
var pt = sdist.indexOf ('(');
|
| 935 |
+
if (pt >= 0) {
|
| 936 |
+
var data = sdist.toCharArray ();
|
| 937 |
+
var sdx = sdist.substring (pt + 1, sdist.length - 1);
|
| 938 |
+
var n = sdx.length;
|
| 939 |
+
for (var j = pt; --j >= 0; ) {
|
| 940 |
+
if (data[j] == '.' && --j < 0) break;
|
| 941 |
+
data[j] = (--n < 0 ? '0' : sdx.charAt (n));
|
| 942 |
+
}
|
| 943 |
+
dx = this.parseFloatStr (String.valueOf (data));
|
| 944 |
+
if (Float.isNaN (dx)) {
|
| 945 |
+
JU.Logger.info ("error reading uncertainty for " + this.line);
|
| 946 |
+
dx = 0.015;
|
| 947 |
+
}} else {
|
| 948 |
+
dx = 0.015;
|
| 949 |
+
}bondCount++;
|
| 950 |
+
this.bondTypes.addLast ( Clazz.newArray (-1, [name1, name2, Float.$valueOf (distance), Float.$valueOf (dx), Integer.$valueOf (order)]));
|
| 951 |
+
}
|
| 952 |
+
if (bondCount > 0) {
|
| 953 |
+
JU.Logger.info (bondCount + " bonds read");
|
| 954 |
+
if (!this.doApplySymmetry) {
|
| 955 |
+
this.isMolecular = true;
|
| 956 |
+
this.forceSymmetry (false);
|
| 957 |
+
}}});
|
| 958 |
+
Clazz.defineMethod (c$, "setBondingAndMolecules",
|
| 959 |
+
function () {
|
| 960 |
+
this.atoms = this.asc.atoms;
|
| 961 |
+
this.firstAtom = this.asc.getLastAtomSetAtomIndex ();
|
| 962 |
+
var nat = this.asc.getLastAtomSetAtomCount ();
|
| 963 |
+
this.ac = this.firstAtom + nat;
|
| 964 |
+
JU.Logger.info ("CIF creating molecule for " + nat + " atoms " + (this.bondTypes.size () > 0 ? " using GEOM_BOND records" : ""));
|
| 965 |
+
this.bsSets = new Array (nat);
|
| 966 |
+
this.symmetry = this.asc.getSymmetry ();
|
| 967 |
+
for (var i = this.firstAtom; i < this.ac; i++) {
|
| 968 |
+
var ipt = this.asc.getAtomFromName (this.atoms[i].atomName).index - this.firstAtom;
|
| 969 |
+
if (this.bsSets[ipt] == null) this.bsSets[ipt] = new JU.BS ();
|
| 970 |
+
this.bsSets[ipt].set (i - this.firstAtom);
|
| 971 |
+
}
|
| 972 |
+
if (this.isMolecular) {
|
| 973 |
+
this.atomRadius = Clazz.newFloatArray (this.ac, 0);
|
| 974 |
+
for (var i = this.firstAtom; i < this.ac; i++) {
|
| 975 |
+
var elemnoWithIsotope = J.api.JmolAdapter.getElementNumber (this.atoms[i].getElementSymbol ());
|
| 976 |
+
this.atoms[i].elementNumber = elemnoWithIsotope;
|
| 977 |
+
var charge = (this.atoms[i].formalCharge == -2147483648 ? 0 : this.atoms[i].formalCharge);
|
| 978 |
+
if (elemnoWithIsotope > 0) this.atomRadius[i] = J.api.JmolAdapter.getBondingRadius (elemnoWithIsotope, charge);
|
| 979 |
+
}
|
| 980 |
+
this.bsConnected = new Array (this.ac);
|
| 981 |
+
for (var i = this.firstAtom; i < this.ac; i++) this.bsConnected[i] = new JU.BS ();
|
| 982 |
+
|
| 983 |
+
this.bsMolecule = new JU.BS ();
|
| 984 |
+
this.bsExclude = new JU.BS ();
|
| 985 |
+
}var isFirst = true;
|
| 986 |
+
this.bsBondDuplicates = new JU.BS ();
|
| 987 |
+
while (this.createBonds (isFirst)) {
|
| 988 |
+
isFirst = false;
|
| 989 |
+
}
|
| 990 |
+
if (this.isMolecular && this.iHaveFractionalCoordinates && !this.bsMolecule.isEmpty ()) {
|
| 991 |
+
if (this.asc.bsAtoms == null) this.asc.bsAtoms = new JU.BS ();
|
| 992 |
+
this.asc.bsAtoms.clearBits (this.firstAtom, this.ac);
|
| 993 |
+
this.asc.bsAtoms.or (this.bsMolecule);
|
| 994 |
+
this.asc.bsAtoms.andNot (this.bsExclude);
|
| 995 |
+
for (var i = this.firstAtom; i < this.ac; i++) {
|
| 996 |
+
if (this.asc.bsAtoms.get (i)) this.symmetry.toCartesian (this.atoms[i], true);
|
| 997 |
+
else if (this.debugging) JU.Logger.debug (this.molecularType + " removing " + i + " " + this.atoms[i].atomName + " " + this.atoms[i]);
|
| 998 |
+
}
|
| 999 |
+
this.asc.setCurrentModelInfo ("unitCellParams", null);
|
| 1000 |
+
if (this.nMolecular++ == this.asc.iSet) {
|
| 1001 |
+
this.asc.clearGlobalBoolean (0);
|
| 1002 |
+
this.asc.clearGlobalBoolean (1);
|
| 1003 |
+
this.asc.clearGlobalBoolean (2);
|
| 1004 |
+
}}if (this.bondTypes.size () > 0) this.asc.setCurrentModelInfo ("hasBonds", Boolean.TRUE);
|
| 1005 |
+
this.bondTypes.clear ();
|
| 1006 |
+
this.atomRadius = null;
|
| 1007 |
+
this.bsSets = null;
|
| 1008 |
+
this.bsConnected = null;
|
| 1009 |
+
this.bsMolecule = null;
|
| 1010 |
+
this.bsExclude = null;
|
| 1011 |
+
});
|
| 1012 |
+
Clazz.defineMethod (c$, "fixAtomForBonding",
|
| 1013 |
+
function (pt, i) {
|
| 1014 |
+
pt.setT (this.atoms[i]);
|
| 1015 |
+
if (this.iHaveFractionalCoordinates) this.symmetry.toCartesian (pt, true);
|
| 1016 |
+
}, "JU.P3,~N");
|
| 1017 |
+
Clazz.defineMethod (c$, "createBonds",
|
| 1018 |
+
function (doInit) {
|
| 1019 |
+
var list = "";
|
| 1020 |
+
var haveH = false;
|
| 1021 |
+
for (var i = this.bondTypes.size (); --i >= 0; ) {
|
| 1022 |
+
if (this.bsBondDuplicates.get (i)) continue;
|
| 1023 |
+
var o = this.bondTypes.get (i);
|
| 1024 |
+
var distance = (o[2]).floatValue ();
|
| 1025 |
+
var dx = (o[3]).floatValue ();
|
| 1026 |
+
var order = (o[4]).intValue ();
|
| 1027 |
+
var iatom1 = this.asc.getAtomIndex (o[0]);
|
| 1028 |
+
var iatom2 = this.asc.getAtomIndex (o[1]);
|
| 1029 |
+
if (doInit) {
|
| 1030 |
+
var key = ";" + iatom1 + ";" + iatom2 + ";" + distance;
|
| 1031 |
+
if (list.indexOf (key) >= 0) {
|
| 1032 |
+
this.bsBondDuplicates.set (i);
|
| 1033 |
+
continue;
|
| 1034 |
+
}list += key;
|
| 1035 |
+
}var bs1 = this.bsSets[iatom1 - this.firstAtom];
|
| 1036 |
+
var bs2 = this.bsSets[iatom2 - this.firstAtom];
|
| 1037 |
+
if (bs1 == null || bs2 == null) continue;
|
| 1038 |
+
if (this.atoms[iatom1].elementNumber == 1 || this.atoms[iatom2].elementNumber == 1) haveH = true;
|
| 1039 |
+
for (var j = bs1.nextSetBit (0); j >= 0; j = bs1.nextSetBit (j + 1)) {
|
| 1040 |
+
for (var k = bs2.nextSetBit (0); k >= 0; k = bs2.nextSetBit (k + 1)) {
|
| 1041 |
+
if ((!this.isMolecular || !this.bsConnected[j + this.firstAtom].get (k)) && this.checkBondDistance (this.atoms[j + this.firstAtom], this.atoms[k + this.firstAtom], distance, dx)) this.addNewBond (j + this.firstAtom, k + this.firstAtom, order);
|
| 1042 |
+
}
|
| 1043 |
+
}
|
| 1044 |
+
}
|
| 1045 |
+
if (!this.iHaveFractionalCoordinates) return false;
|
| 1046 |
+
if (this.bondTypes.size () > 0 && !haveH) for (var i = this.firstAtom; i < this.ac; i++) if (this.atoms[i].elementNumber == 1) {
|
| 1047 |
+
var checkAltLoc = (this.atoms[i].altLoc != '\0');
|
| 1048 |
+
for (var k = this.firstAtom; k < this.ac; k++) if (k != i && this.atoms[k].elementNumber != 1 && (!checkAltLoc || this.atoms[k].altLoc == '\0' || this.atoms[k].altLoc == this.atoms[i].altLoc)) {
|
| 1049 |
+
if (!this.bsConnected[i].get (k) && this.checkBondDistance (this.atoms[i], this.atoms[k], 1.1, 0)) this.addNewBond (i, k, 1);
|
| 1050 |
+
}
|
| 1051 |
+
}
|
| 1052 |
+
if (!this.isMolecular) return false;
|
| 1053 |
+
if (doInit) for (var i = this.firstAtom; i < this.ac; i++) if (this.atoms[i].atomSite + this.firstAtom == i && !this.bsMolecule.get (i)) this.setBs (this.atoms, i, this.bsConnected, this.bsMolecule);
|
| 1054 |
+
|
| 1055 |
+
var bondTolerance = this.vwr.getFloat (570425348);
|
| 1056 |
+
var bsBranch = new JU.BS ();
|
| 1057 |
+
var cart1 = new JU.P3 ();
|
| 1058 |
+
var cart2 = new JU.P3 ();
|
| 1059 |
+
var nFactor = 2;
|
| 1060 |
+
for (var i = this.firstAtom; i < this.ac; i++) if (!this.bsMolecule.get (i) && !this.bsExclude.get (i)) for (var j = this.bsMolecule.nextSetBit (0); j >= 0; j = this.bsMolecule.nextSetBit (j + 1)) if (this.symmetry.checkDistance (this.atoms[j], this.atoms[i], this.atomRadius[i] + this.atomRadius[j] + bondTolerance, 0, nFactor, nFactor, nFactor, this.ptOffset)) {
|
| 1061 |
+
this.setBs (this.atoms, i, this.bsConnected, bsBranch);
|
| 1062 |
+
for (var k = bsBranch.nextSetBit (0); k >= 0; k = bsBranch.nextSetBit (k + 1)) {
|
| 1063 |
+
this.atoms[k].add (this.ptOffset);
|
| 1064 |
+
this.fixAtomForBonding (cart1, k);
|
| 1065 |
+
var bs = this.bsSets[this.asc.getAtomIndex (this.atoms[k].atomName) - this.firstAtom];
|
| 1066 |
+
if (bs != null) for (var ii = bs.nextSetBit (0); ii >= 0; ii = bs.nextSetBit (ii + 1)) {
|
| 1067 |
+
if (ii + this.firstAtom == k) continue;
|
| 1068 |
+
this.fixAtomForBonding (cart2, ii + this.firstAtom);
|
| 1069 |
+
if (cart2.distance (cart1) < 0.1) {
|
| 1070 |
+
this.bsExclude.set (k);
|
| 1071 |
+
break;
|
| 1072 |
+
}}
|
| 1073 |
+
this.bsMolecule.set (k);
|
| 1074 |
+
}
|
| 1075 |
+
return true;
|
| 1076 |
+
}
|
| 1077 |
+
|
| 1078 |
+
return false;
|
| 1079 |
+
}, "~B");
|
| 1080 |
+
Clazz.defineMethod (c$, "checkBondDistance",
|
| 1081 |
+
function (a, b, distance, dx) {
|
| 1082 |
+
if (this.iHaveFractionalCoordinates) return this.symmetry.checkDistance (a, b, distance, dx, 0, 0, 0, this.ptOffset);
|
| 1083 |
+
var d = a.distance (b);
|
| 1084 |
+
return (dx > 0 ? Math.abs (d - distance) <= dx : d <= distance && d > 0.1);
|
| 1085 |
+
}, "J.adapter.smarter.Atom,J.adapter.smarter.Atom,~N,~N");
|
| 1086 |
+
Clazz.defineMethod (c$, "addNewBond",
|
| 1087 |
+
function (i, j, order) {
|
| 1088 |
+
this.asc.addNewBondWithOrder (i, j, order);
|
| 1089 |
+
if (!this.isMolecular) return;
|
| 1090 |
+
this.bsConnected[i].set (j);
|
| 1091 |
+
this.bsConnected[j].set (i);
|
| 1092 |
+
}, "~N,~N,~N");
|
| 1093 |
+
Clazz.defineMethod (c$, "setBs",
|
| 1094 |
+
function (atoms, iatom, bsBonds, bs) {
|
| 1095 |
+
var bsBond = bsBonds[iatom];
|
| 1096 |
+
bs.set (iatom);
|
| 1097 |
+
for (var i = bsBond.nextSetBit (0); i >= 0; i = bsBond.nextSetBit (i + 1)) {
|
| 1098 |
+
if (!bs.get (i)) this.setBs (atoms, i, bsBonds, bs);
|
| 1099 |
+
}
|
| 1100 |
+
}, "~A,~N,~A,JU.BS");
|
| 1101 |
+
Clazz.defineMethod (c$, "checkSubclassSymmetry",
|
| 1102 |
+
function () {
|
| 1103 |
+
return this.doCheckUnitCell;
|
| 1104 |
+
});
|
| 1105 |
+
Clazz.defineMethod (c$, "checkAllFieldsPresent",
|
| 1106 |
+
function (keys, lastKey, critical) {
|
| 1107 |
+
for (var i = (lastKey < 0 ? keys.length : lastKey); --i >= 0; ) if (this.key2col[i] == -1) {
|
| 1108 |
+
if (critical) JU.Logger.warn ("CIF reader missing property: " + keys[i]);
|
| 1109 |
+
return false;
|
| 1110 |
+
}
|
| 1111 |
+
return true;
|
| 1112 |
+
}, "~A,~N,~B");
|
| 1113 |
+
Clazz.defineMethod (c$, "getField",
|
| 1114 |
+
function (type) {
|
| 1115 |
+
var i = this.key2col[type];
|
| 1116 |
+
return (i == -1 ? "\0" : this.parser.getColumnData (i));
|
| 1117 |
+
}, "~N");
|
| 1118 |
+
Clazz.defineMethod (c$, "isNull",
|
| 1119 |
+
function (key) {
|
| 1120 |
+
return key.equals ("\0");
|
| 1121 |
+
}, "~S");
|
| 1122 |
+
Clazz.defineStatics (c$,
|
| 1123 |
+
"titleRecords", "__citation_title__publ_section_title__active_magnetic_irreps_details__",
|
| 1124 |
+
"TransformFields", Clazz.newArray (-1, ["x[1][1]", "x[1][2]", "x[1][3]", "r[1]", "x[2][1]", "x[2][2]", "x[2][3]", "r[2]", "x[3][1]", "x[3][2]", "x[3][3]", "r[3]"]),
|
| 1125 |
+
"ATOM_TYPE_SYMBOL", 0,
|
| 1126 |
+
"ATOM_TYPE_OXIDATION_NUMBER", 1,
|
| 1127 |
+
"atomTypeFields", Clazz.newArray (-1, ["_atom_type_symbol", "_atom_type_oxidation_number"]),
|
| 1128 |
+
"NONE", -1,
|
| 1129 |
+
"TYPE_SYMBOL", 0,
|
| 1130 |
+
"LABEL", 1,
|
| 1131 |
+
"AUTH_ATOM", 2,
|
| 1132 |
+
"FRACT_X", 3,
|
| 1133 |
+
"FRACT_Y", 4,
|
| 1134 |
+
"FRACT_Z", 5,
|
| 1135 |
+
"CARTN_X", 6,
|
| 1136 |
+
"CARTN_Y", 7,
|
| 1137 |
+
"CARTN_Z", 8,
|
| 1138 |
+
"OCCUPANCY", 9,
|
| 1139 |
+
"B_ISO", 10,
|
| 1140 |
+
"COMP_ID", 11,
|
| 1141 |
+
"AUTH_ASYM_ID", 12,
|
| 1142 |
+
"AUTH_SEQ_ID", 13,
|
| 1143 |
+
"INS_CODE", 14,
|
| 1144 |
+
"ALT_ID", 15,
|
| 1145 |
+
"GROUP_PDB", 16,
|
| 1146 |
+
"MODEL_NO", 17,
|
| 1147 |
+
"DUMMY_ATOM", 18,
|
| 1148 |
+
"DISORDER_GROUP", 19,
|
| 1149 |
+
"ANISO_LABEL", 20,
|
| 1150 |
+
"ANISO_MMCIF_ID", 21,
|
| 1151 |
+
"ANISO_U11", 22,
|
| 1152 |
+
"ANISO_U22", 23,
|
| 1153 |
+
"ANISO_U33", 24,
|
| 1154 |
+
"ANISO_U12", 25,
|
| 1155 |
+
"ANISO_U13", 26,
|
| 1156 |
+
"ANISO_U23", 27,
|
| 1157 |
+
"ANISO_MMCIF_U11", 28,
|
| 1158 |
+
"ANISO_MMCIF_U22", 29,
|
| 1159 |
+
"ANISO_MMCIF_U33", 30,
|
| 1160 |
+
"ANISO_MMCIF_U12", 31,
|
| 1161 |
+
"ANISO_MMCIF_U13", 32,
|
| 1162 |
+
"ANISO_MMCIF_U23", 33,
|
| 1163 |
+
"U_ISO_OR_EQUIV", 34,
|
| 1164 |
+
"ANISO_B11", 35,
|
| 1165 |
+
"ANISO_B22", 36,
|
| 1166 |
+
"ANISO_B33", 37,
|
| 1167 |
+
"ANISO_B12", 38,
|
| 1168 |
+
"ANISO_B13", 39,
|
| 1169 |
+
"ANISO_B23", 40,
|
| 1170 |
+
"ANISO_BETA_11", 41,
|
| 1171 |
+
"ANISO_BETA_22", 42,
|
| 1172 |
+
"ANISO_BETA_33", 43,
|
| 1173 |
+
"ANISO_BETA_12", 44,
|
| 1174 |
+
"ANISO_BETA_13", 45,
|
| 1175 |
+
"ANISO_BETA_23", 46,
|
| 1176 |
+
"ADP_TYPE", 47,
|
| 1177 |
+
"CC_COMP_ID", 48,
|
| 1178 |
+
"CC_ATOM_ID", 49,
|
| 1179 |
+
"CC_ATOM_SYM", 50,
|
| 1180 |
+
"CC_ATOM_CHARGE", 51,
|
| 1181 |
+
"CC_ATOM_X", 52,
|
| 1182 |
+
"CC_ATOM_Y", 53,
|
| 1183 |
+
"CC_ATOM_Z", 54,
|
| 1184 |
+
"CC_ATOM_X_IDEAL", 55,
|
| 1185 |
+
"CC_ATOM_Y_IDEAL", 56,
|
| 1186 |
+
"CC_ATOM_Z_IDEAL", 57,
|
| 1187 |
+
"DISORDER_ASSEMBLY", 58,
|
| 1188 |
+
"ASYM_ID", 59,
|
| 1189 |
+
"SUBSYS_ID", 60,
|
| 1190 |
+
"SITE_MULT", 61,
|
| 1191 |
+
"THERMAL_TYPE", 62,
|
| 1192 |
+
"MOMENT_LABEL", 63,
|
| 1193 |
+
"MOMENT_PRELIM_X", 64,
|
| 1194 |
+
"MOMENT_PRELIM_Y", 65,
|
| 1195 |
+
"MOMENT_PRELIM_Z", 66,
|
| 1196 |
+
"MOMENT_X", 67,
|
| 1197 |
+
"MOMENT_Y", 68,
|
| 1198 |
+
"MOMENT_Z", 69,
|
| 1199 |
+
"ATOM_ID", 70,
|
| 1200 |
+
"SEQ_ID", 71,
|
| 1201 |
+
"FAMILY_ATOM", "_atom_site",
|
| 1202 |
+
"atomFields", Clazz.newArray (-1, ["*_type_symbol", "*_label", "*_auth_atom_id", "*_fract_x", "*_fract_y", "*_fract_z", "*_cartn_x", "*_cartn_y", "*_cartn_z", "*_occupancy", "*_b_iso_or_equiv", "*_auth_comp_id", "*_auth_asym_id", "*_auth_seq_id", "*_pdbx_pdb_ins_code", "*_label_alt_id", "*_group_pdb", "*_pdbx_pdb_model_num", "*_calc_flag", "*_disorder_group", "*_aniso_label", "*_anisotrop_id", "*_aniso_u_11", "*_aniso_u_22", "*_aniso_u_33", "*_aniso_u_12", "*_aniso_u_13", "*_aniso_u_23", "*_anisotrop_u[1][1]", "*_anisotrop_u[2][2]", "*_anisotrop_u[3][3]", "*_anisotrop_u[1][2]", "*_anisotrop_u[1][3]", "*_anisotrop_u[2][3]", "*_u_iso_or_equiv", "*_aniso_b_11", "*_aniso_b_22", "*_aniso_b_33", "*_aniso_b_12", "*_aniso_b_13", "*_aniso_b_23", "*_aniso_beta_11", "*_aniso_beta_22", "*_aniso_beta_33", "*_aniso_beta_12", "*_aniso_beta_13", "*_aniso_beta_23", "*_adp_type", "_chem_comp_atom_comp_id", "_chem_comp_atom_atom_id", "_chem_comp_atom_type_symbol", "_chem_comp_atom_charge", "_chem_comp_atom_model_cartn_x", "_chem_comp_atom_model_cartn_y", "_chem_comp_atom_model_cartn_z", "_chem_comp_atom_pdbx_model_cartn_x_ideal", "_chem_comp_atom_pdbx_model_cartn_y_ideal", "_chem_comp_atom_pdbx_model_cartn_z_ideal", "*_disorder_assembly", "*_label_asym_id", "*_subsystem_code", "*_symmetry_multiplicity", "*_thermal_displace_type", "*_moment_label", "*_moment_crystalaxis_mx", "*_moment_crystalaxis_my", "*_moment_crystalaxis_mz", "*_moment_crystalaxis_x", "*_moment_crystalaxis_y", "*_moment_crystalaxis_z", "*_id", "*_label_seq_id"]));
|
| 1203 |
+
c$.singleAtomID = c$.prototype.singleAtomID = J.adapter.readers.cif.CifReader.atomFields[48];
|
| 1204 |
+
Clazz.defineStatics (c$,
|
| 1205 |
+
"CITATION_TITLE", 0,
|
| 1206 |
+
"citationFields", Clazz.newArray (-1, ["_citation_title"]),
|
| 1207 |
+
"SYM_XYZ", 0,
|
| 1208 |
+
"SYM_MAGN_XYZ", 1,
|
| 1209 |
+
"SYM_SSG_ALG", 2,
|
| 1210 |
+
"SYM_MAGN_SSG_ALG", 3,
|
| 1211 |
+
"SYM_EQ_XYZ", 4,
|
| 1212 |
+
"SYM_SSG_EQ_XYZ", 5,
|
| 1213 |
+
"SYM_MAGN_REV", 7,
|
| 1214 |
+
"SYM_MAGN_SSG_REV", 8,
|
| 1215 |
+
"SYM_MAGN_REV_PRELIM", 6,
|
| 1216 |
+
"SYM_MAGN_CENTERING", 9,
|
| 1217 |
+
"SYM_MAGN_SSG_CENTERING", 10,
|
| 1218 |
+
"SYM_MAGN_SSG_CENT_XYZ", 11,
|
| 1219 |
+
"FAMILY_SGOP", "_space_group_symop",
|
| 1220 |
+
"symmetryOperationsFields", Clazz.newArray (-1, ["*_operation_xyz", "*_magn_operation_xyz", "*_ssg_operation_algebraic", "*_magn_ssg_operation_algebraic", "_symmetry_equiv_pos_as_xyz", "_symmetry_ssg_equiv_pos_as_xyz", "*_magn_operation_timereversal", "*_magn_ssg_operation_timereversal", "*_operation_timereversal", "*_magn_centering_xyz", "*_magn_ssg_centering_algebraic", "*_magn_ssg_centering_xyz"]),
|
| 1221 |
+
"GEOM_BOND_ATOM_SITE_LABEL_1", 0,
|
| 1222 |
+
"GEOM_BOND_ATOM_SITE_LABEL_2", 1,
|
| 1223 |
+
"GEOM_BOND_DISTANCE", 2,
|
| 1224 |
+
"CCDC_GEOM_BOND_TYPE", 3,
|
| 1225 |
+
"geomBondFields", Clazz.newArray (-1, ["_geom_bond_atom_site_label_1", "_geom_bond_atom_site_label_2", "_geom_bond_distance", "_ccdc_geom_bond_type"]));
|
| 1226 |
+
});
|
static/j2s/J/adapter/readers/cif/MMCifReader.js
ADDED
|
@@ -0,0 +1,709 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Clazz.declarePackage ("J.adapter.readers.cif");
|
| 2 |
+
Clazz.load (["J.adapter.readers.cif.CifReader"], "J.adapter.readers.cif.MMCifReader", ["java.lang.Boolean", "java.util.Hashtable", "JU.BS", "$.Lst", "$.M4", "$.P3", "$.PT", "$.SB", "J.adapter.smarter.Atom", "$.Structure", "J.c.STR", "JU.BSUtil", "$.Logger"], function () {
|
| 3 |
+
c$ = Clazz.decorateAsClass (function () {
|
| 4 |
+
this.isBiomolecule = false;
|
| 5 |
+
this.byChain = false;
|
| 6 |
+
this.bySymop = false;
|
| 7 |
+
this.chainAtomMap = null;
|
| 8 |
+
this.chainAtomCounts = null;
|
| 9 |
+
this.vBiomolecules = null;
|
| 10 |
+
this.htBiomts = null;
|
| 11 |
+
this.htSites = null;
|
| 12 |
+
this.htHetero = null;
|
| 13 |
+
this.htBondMap = null;
|
| 14 |
+
this.assemblyIdAtoms = null;
|
| 15 |
+
this.thisChain = -1;
|
| 16 |
+
this.modelIndex = 0;
|
| 17 |
+
this.chainSum = null;
|
| 18 |
+
this.chainAtomCount = null;
|
| 19 |
+
this.isLigandBondBug = false;
|
| 20 |
+
this.mident = null;
|
| 21 |
+
this.requiresSorting = false;
|
| 22 |
+
this.structConnMap = null;
|
| 23 |
+
this.structConnList = "";
|
| 24 |
+
this.doSetBonds = false;
|
| 25 |
+
this.modelStrings = "";
|
| 26 |
+
this.done = false;
|
| 27 |
+
Clazz.instantialize (this, arguments);
|
| 28 |
+
}, J.adapter.readers.cif, "MMCifReader", J.adapter.readers.cif.CifReader);
|
| 29 |
+
Clazz.overrideMethod (c$, "initSubclass",
|
| 30 |
+
function () {
|
| 31 |
+
this.setIsPDB ();
|
| 32 |
+
this.mident = JU.M4.newM4 (null);
|
| 33 |
+
this.isMMCIF = true;
|
| 34 |
+
if (this.isDSSP1) this.asc.setInfo ("isDSSP1", Boolean.TRUE);
|
| 35 |
+
if (this.htParams.containsKey ("isMutate")) this.asc.setInfo ("isMutate", Boolean.TRUE);
|
| 36 |
+
this.doSetBonds = this.checkFilterKey ("ADDBONDS");
|
| 37 |
+
this.byChain = this.checkFilterKey ("BYCHAIN");
|
| 38 |
+
if (this.checkFilterKey ("BIOMOLECULE")) this.filter = JU.PT.rep (this.filter, "BIOMOLECULE", "ASSEMBLY");
|
| 39 |
+
this.isBiomolecule = this.checkFilterKey ("ASSEMBLY");
|
| 40 |
+
if (this.isBiomolecule) {
|
| 41 |
+
this.filter = this.filter.$replace (':', ' ');
|
| 42 |
+
this.bySymop = this.checkFilterKey ("BYSYMOP");
|
| 43 |
+
}this.isCourseGrained = this.byChain || this.bySymop;
|
| 44 |
+
if (this.isCourseGrained) {
|
| 45 |
+
this.chainAtomMap = new java.util.Hashtable ();
|
| 46 |
+
this.chainAtomCounts = new java.util.Hashtable ();
|
| 47 |
+
}this.isLigandBondBug = (this.stateScriptVersionInt >= 140204 && this.stateScriptVersionInt <= 140208 || this.stateScriptVersionInt >= 140304 && this.stateScriptVersionInt <= 140308);
|
| 48 |
+
});
|
| 49 |
+
Clazz.overrideMethod (c$, "processSubclassEntry",
|
| 50 |
+
function () {
|
| 51 |
+
if (this.key0.startsWith ("_pdbx_struct_assembly_gen.") || this.key0.startsWith ("_struct_conn.") || this.key0.startsWith ("_struct_ref_seq_dif.") || this.key0.startsWith ("_struct_conf.") || this.key0.startsWith ("_struct_sheet_range.")) this.processSubclassLoopBlock ();
|
| 52 |
+
else if (this.key.equals ("_rna3d")) {
|
| 53 |
+
this.addedData = this.data;
|
| 54 |
+
this.addedDataKey = this.key;
|
| 55 |
+
} else if (this.key.equals ("_dssr")) {
|
| 56 |
+
this.dssr = this.vwr.parseJSONMap (this.reader.readLine ());
|
| 57 |
+
this.reader.readLine ();
|
| 58 |
+
}});
|
| 59 |
+
Clazz.overrideMethod (c$, "processSubclassLoopBlock",
|
| 60 |
+
function () {
|
| 61 |
+
if (this.key0.startsWith ("_struct_ncs_oper.")) return this.processStructOperListBlock (true);
|
| 62 |
+
if (this.key0.startsWith ("_pdbx_struct_oper_list.")) return this.processStructOperListBlock (false);
|
| 63 |
+
if (this.key0.startsWith ("_pdbx_struct_assembly_gen.")) return this.processAssemblyGenBlock ();
|
| 64 |
+
if (this.key0.startsWith ("_struct_ref_seq_dif.")) return this.processSequence ();
|
| 65 |
+
if (this.isCourseGrained) return false;
|
| 66 |
+
if (this.key0.startsWith ("_struct_site_gen.")) return this.processStructSiteBlock ();
|
| 67 |
+
if (this.key0.startsWith ("_chem_comp.")) return this.processChemCompLoopBlock ();
|
| 68 |
+
if (this.key0.startsWith ("_struct_conf.")) return this.processStructConfLoopBlock ();
|
| 69 |
+
if (this.key0.startsWith ("_struct_sheet_range.")) return this.processStructSheetRangeLoopBlock ();
|
| 70 |
+
if (this.isLigandBondBug) return false;
|
| 71 |
+
if (this.key0.startsWith ("_chem_comp_bond.")) return this.processCompBondLoopBlock ();
|
| 72 |
+
if (this.key0.startsWith ("_struct_conn.")) return this.processStructConnLoopBlock ();
|
| 73 |
+
return false;
|
| 74 |
+
});
|
| 75 |
+
Clazz.defineMethod (c$, "sortAssemblyModels",
|
| 76 |
+
function () {
|
| 77 |
+
var natoms = this.asc.ac;
|
| 78 |
+
var lastSet = -1;
|
| 79 |
+
var atoms = this.asc.atoms;
|
| 80 |
+
var newAtoms = new Array (natoms);
|
| 81 |
+
var ids = JU.PT.split ("," + this.modelStrings + ",", ",,");
|
| 82 |
+
var bsAtomsNew = (this.asc.bsAtoms == null ? null : JU.BS.newN (this.asc.bsAtoms.size ()));
|
| 83 |
+
for (var im = 1, n = 0; im < ids.length; im++) {
|
| 84 |
+
var sModel = ids[im];
|
| 85 |
+
var modelIndex = -1;
|
| 86 |
+
for (var is = 0; is < this.asc.atomSetCount; is++) {
|
| 87 |
+
var ia0 = this.asc.getAtomSetAtomIndex (is);
|
| 88 |
+
var ia1 = ia0 + this.asc.getAtomSetAtomCount (is);
|
| 89 |
+
var am = "" + this.modelMap.get ("_" + is);
|
| 90 |
+
if (am.equals (sModel)) {
|
| 91 |
+
if (modelIndex < 0 && (modelIndex = is) > lastSet) lastSet = is;
|
| 92 |
+
for (var i = ia0; i < ia1; i++) {
|
| 93 |
+
if (bsAtomsNew == null || this.asc.bsAtoms.get (i)) {
|
| 94 |
+
if (bsAtomsNew != null) bsAtomsNew.set (n);
|
| 95 |
+
atoms[i].atomSetIndex = modelIndex;
|
| 96 |
+
newAtoms[n++] = atoms[i];
|
| 97 |
+
}}
|
| 98 |
+
}}
|
| 99 |
+
}
|
| 100 |
+
this.asc.atoms = newAtoms;
|
| 101 |
+
this.asc.bsAtoms = bsAtomsNew;
|
| 102 |
+
if (++lastSet < this.asc.atomSetCount) this.asc.atomSetCount = lastSet;
|
| 103 |
+
});
|
| 104 |
+
Clazz.overrideMethod (c$, "finalizeSubclass",
|
| 105 |
+
function () {
|
| 106 |
+
if (this.byChain && !this.isBiomolecule) for (var id, $id = this.chainAtomMap.keySet ().iterator (); $id.hasNext () && ((id = $id.next ()) || true);) this.createParticle (id);
|
| 107 |
+
|
| 108 |
+
var haveBiomolecule = (this.isBiomolecule && this.vBiomolecules != null && this.vBiomolecules.size () > 0);
|
| 109 |
+
if (!this.isCourseGrained && this.asc.ac == this.nAtoms) {
|
| 110 |
+
this.asc.removeCurrentAtomSet ();
|
| 111 |
+
} else {
|
| 112 |
+
if ((this.dssr != null || this.validation != null || this.addedData != null) && !this.isCourseGrained && !this.requiresSorting) {
|
| 113 |
+
var vs = (this.getInterface ("J.adapter.readers.cif.MMCifValidationParser")).set (this);
|
| 114 |
+
var note = null;
|
| 115 |
+
if (this.addedData == null) {
|
| 116 |
+
if (this.validation != null || this.dssr != null) note = vs.finalizeValidations (this.vwr, this.modelMap);
|
| 117 |
+
} else if (this.addedDataKey.equals ("_rna3d")) {
|
| 118 |
+
note = vs.finalizeRna3d (this.modelMap);
|
| 119 |
+
}if (note != null) this.appendLoadNote (note);
|
| 120 |
+
}this.setHetero ();
|
| 121 |
+
if (this.doSetBonds) this.setBonds ();
|
| 122 |
+
}if (this.asc.ac == 0 && !this.isCourseGrained) return false;
|
| 123 |
+
var spaceGroup = this.sgName;
|
| 124 |
+
if (this.htSites != null) this.addSites (this.htSites);
|
| 125 |
+
if (haveBiomolecule) {
|
| 126 |
+
this.asc.setCurrentModelInfo ("biomolecules", this.vBiomolecules);
|
| 127 |
+
this.setBiomolecules ();
|
| 128 |
+
if (this.thisBiomolecule != null) {
|
| 129 |
+
if (this.iHaveFractionalCoordinates) this.fractionalizeCoordinates (false);
|
| 130 |
+
this.asc.getXSymmetry ().applySymmetryBio (this.thisBiomolecule, this.applySymmetryToBonds, this.filter);
|
| 131 |
+
this.asc.xtalSymmetry = null;
|
| 132 |
+
}this.doCheckUnitCell = new Boolean (this.doCheckUnitCell & (this.iHaveUnitCell && this.doApplySymmetry)).valueOf ();
|
| 133 |
+
if (this.doCheckUnitCell) {
|
| 134 |
+
this.ignoreFileSpaceGroupName = true;
|
| 135 |
+
this.sgName = spaceGroup;
|
| 136 |
+
this.fractionalizeCoordinates (true);
|
| 137 |
+
this.asc.setCurrentModelInfo ("biosymmetry", null);
|
| 138 |
+
this.asc.setCurrentModelInfo ("biosymmetryCount", null);
|
| 139 |
+
this.asc.checkSpecial = false;
|
| 140 |
+
if (this.byChain) return true;
|
| 141 |
+
}}if (this.latticeCells != null && this.latticeCells[0] != 0) this.addJmolScript ("unitcell;axes on;axes unitcell;");
|
| 142 |
+
if (this.requiresSorting) this.sortAssemblyModels ();
|
| 143 |
+
return true;
|
| 144 |
+
});
|
| 145 |
+
Clazz.overrideMethod (c$, "checkSubclassSymmetry",
|
| 146 |
+
function () {
|
| 147 |
+
this.asc.checkSpecial = false;
|
| 148 |
+
var modelIndex = this.asc.iSet;
|
| 149 |
+
this.asc.setCurrentModelInfo ("PDB_CONECT_firstAtom_count_max", Clazz.newIntArray (-1, [this.asc.getAtomSetAtomIndex (modelIndex), this.asc.getAtomSetAtomCount (modelIndex), this.maxSerial]));
|
| 150 |
+
return false;
|
| 151 |
+
});
|
| 152 |
+
Clazz.defineMethod (c$, "setBonds",
|
| 153 |
+
function () {
|
| 154 |
+
if (this.htBondMap == null) return;
|
| 155 |
+
var bsAtoms = this.asc.bsAtoms;
|
| 156 |
+
if (bsAtoms == null) bsAtoms = JU.BSUtil.newBitSet2 (0, this.asc.ac);
|
| 157 |
+
var atoms = this.asc.atoms;
|
| 158 |
+
var seqid = -1;
|
| 159 |
+
var comp = null;
|
| 160 |
+
var map = null;
|
| 161 |
+
for (var i = bsAtoms.nextSetBit (0); i >= 0; i = bsAtoms.nextSetBit (i + 1)) {
|
| 162 |
+
var a = atoms[i];
|
| 163 |
+
var pt = (a.vib == null ? a.sequenceNumber : a.vib.x);
|
| 164 |
+
if (pt != seqid) {
|
| 165 |
+
seqid = pt;
|
| 166 |
+
if (comp != null) this.processBonds (this.htBondMap.get (comp), map, false);
|
| 167 |
+
map = new java.util.Hashtable ();
|
| 168 |
+
comp = atoms[i].group3;
|
| 169 |
+
if (!this.htBondMap.containsKey (comp)) {
|
| 170 |
+
comp = null;
|
| 171 |
+
continue;
|
| 172 |
+
}}if (comp == null) continue;
|
| 173 |
+
map.put (a.atomName, Integer.$valueOf (a.index));
|
| 174 |
+
}
|
| 175 |
+
if (comp != null) this.processBonds (this.htBondMap.get (comp), map, false);
|
| 176 |
+
if (this.structConnMap != null) {
|
| 177 |
+
map = new java.util.Hashtable ();
|
| 178 |
+
seqid = -1;
|
| 179 |
+
comp = null;
|
| 180 |
+
for (var i = bsAtoms.nextSetBit (0); i >= 0; i = bsAtoms.nextSetBit (i + 1)) {
|
| 181 |
+
var a = atoms[i];
|
| 182 |
+
var pt = (a.vib == null ? a.sequenceNumber : a.vib.x);
|
| 183 |
+
if (pt != seqid) {
|
| 184 |
+
seqid = pt;
|
| 185 |
+
var ckey = a.chainID + a.group3 + seqid;
|
| 186 |
+
if (this.structConnList.indexOf (ckey) < 0) {
|
| 187 |
+
comp = null;
|
| 188 |
+
continue;
|
| 189 |
+
}comp = ckey;
|
| 190 |
+
}if (comp == null) continue;
|
| 191 |
+
map.put (comp + a.atomName + a.altLoc, Integer.$valueOf (a.index));
|
| 192 |
+
}
|
| 193 |
+
this.processBonds (this.structConnMap, map, true);
|
| 194 |
+
}this.appendLoadNote (this.asc.bondCount + " bonds added");
|
| 195 |
+
});
|
| 196 |
+
Clazz.defineMethod (c$, "processBonds",
|
| 197 |
+
function (cmap, map, isStructConn) {
|
| 198 |
+
var i1;
|
| 199 |
+
var i2;
|
| 200 |
+
for (var i = 0, n = cmap.size (); i < n; i++) {
|
| 201 |
+
var o = cmap.get (i);
|
| 202 |
+
if ((i1 = map.get (o[0])) == null || (i2 = map.get (o[1])) == null) continue;
|
| 203 |
+
if (this.debugging) JU.Logger.debug ((isStructConn ? "_struct_conn" : "_comp_bond") + " adding bond " + i1 + " " + i2 + " order=" + o[2]);
|
| 204 |
+
this.asc.addNewBondWithOrder (i1.intValue (), i2.intValue (), (o[2]).intValue ());
|
| 205 |
+
}
|
| 206 |
+
}, "JU.Lst,java.util.Map,~B");
|
| 207 |
+
Clazz.defineMethod (c$, "processSequence",
|
| 208 |
+
function () {
|
| 209 |
+
this.parseLoopParameters (J.adapter.readers.cif.MMCifReader.structRefFields);
|
| 210 |
+
var g1;
|
| 211 |
+
var g3;
|
| 212 |
+
while (this.parser.getData ()) {
|
| 213 |
+
if (this.isNull (g1 = this.getField (1).toLowerCase ()) || g1.length != 1 || this.isNull (g3 = this.getField (0))) continue;
|
| 214 |
+
if (this.htGroup1 == null) this.asc.setInfo ("htGroup1", this.htGroup1 = new java.util.Hashtable ());
|
| 215 |
+
this.htGroup1.put (g3, g1);
|
| 216 |
+
}
|
| 217 |
+
return true;
|
| 218 |
+
});
|
| 219 |
+
Clazz.defineMethod (c$, "processAssemblyGenBlock",
|
| 220 |
+
function () {
|
| 221 |
+
this.parseLoopParameters (J.adapter.readers.cif.MMCifReader.assemblyFields);
|
| 222 |
+
while (this.parser.getData ()) {
|
| 223 |
+
var assem = new Array (3);
|
| 224 |
+
var count = 0;
|
| 225 |
+
var p;
|
| 226 |
+
var n = this.parser.getColumnCount ();
|
| 227 |
+
for (var i = 0; i < n; ++i) {
|
| 228 |
+
switch (p = this.fieldProperty (i)) {
|
| 229 |
+
case 0:
|
| 230 |
+
case 1:
|
| 231 |
+
case 2:
|
| 232 |
+
count++;
|
| 233 |
+
assem[p] = this.field;
|
| 234 |
+
break;
|
| 235 |
+
}
|
| 236 |
+
}
|
| 237 |
+
if (count == 3) this.addAssembly (assem);
|
| 238 |
+
}
|
| 239 |
+
return true;
|
| 240 |
+
});
|
| 241 |
+
Clazz.defineMethod (c$, "addAssembly",
|
| 242 |
+
function (assem) {
|
| 243 |
+
var id = assem[0];
|
| 244 |
+
var list = assem[2];
|
| 245 |
+
var operators = assem[1];
|
| 246 |
+
var name = "biomolecule " + id;
|
| 247 |
+
JU.Logger.info (name + " operators " + operators + " ASYM_IDs " + list);
|
| 248 |
+
this.appendLoadNote ("found " + name + ": " + list);
|
| 249 |
+
if (this.vBiomolecules == null) this.vBiomolecules = new JU.Lst ();
|
| 250 |
+
var info = null;
|
| 251 |
+
for (var i = this.vBiomolecules.size (); --i >= 0; ) if (this.vBiomolecules.get (i).get ("name").equals (name)) {
|
| 252 |
+
info = this.vBiomolecules.get (i);
|
| 253 |
+
break;
|
| 254 |
+
}
|
| 255 |
+
if (info == null) {
|
| 256 |
+
info = new java.util.Hashtable ();
|
| 257 |
+
info.put ("name", name);
|
| 258 |
+
var iMolecule = this.parseIntStr (id);
|
| 259 |
+
info.put ("molecule", iMolecule == -2147483648 ? id : Integer.$valueOf (iMolecule));
|
| 260 |
+
info.put ("biomts", new JU.Lst ());
|
| 261 |
+
info.put ("chains", new JU.Lst ());
|
| 262 |
+
info.put ("assemblies", new JU.Lst ());
|
| 263 |
+
info.put ("operators", new JU.Lst ());
|
| 264 |
+
this.vBiomolecules.addLast (info);
|
| 265 |
+
}(info.get ("assemblies")).addLast ("$" + list.$replace (',', '$'));
|
| 266 |
+
(info.get ("operators")).addLast (this.decodeAssemblyOperators (operators));
|
| 267 |
+
this.checkFilterAssembly (id, info);
|
| 268 |
+
}, "~A");
|
| 269 |
+
Clazz.defineMethod (c$, "checkFilterAssembly",
|
| 270 |
+
function (id, info) {
|
| 271 |
+
if (this.checkFilterKey ("ASSEMBLY " + id + ";") || this.checkFilterKey ("ASSEMBLY=" + id + ";")) this.thisBiomolecule = info;
|
| 272 |
+
}, "~S,java.util.Map");
|
| 273 |
+
Clazz.defineMethod (c$, "decodeAssemblyOperators",
|
| 274 |
+
function (ops) {
|
| 275 |
+
var pt = ops.indexOf (")(");
|
| 276 |
+
if (pt >= 0) return this.crossBinary (this.decodeAssemblyOperators (ops.substring (0, pt + 1)), this.decodeAssemblyOperators (ops.substring (pt + 1)));
|
| 277 |
+
if (ops.startsWith ("(")) {
|
| 278 |
+
if (ops.indexOf ("-") >= 0) ops = JU.BS.unescape ("({" + ops.substring (1, ops.length - 1).$replace ('-', ':').$replace (',', ' ') + "})").toJSON ();
|
| 279 |
+
ops = JU.PT.rep (ops, " ", "");
|
| 280 |
+
ops = ops.substring (1, ops.length - 1);
|
| 281 |
+
}return ops;
|
| 282 |
+
}, "~S");
|
| 283 |
+
Clazz.defineMethod (c$, "crossBinary",
|
| 284 |
+
function (ops1, ops2) {
|
| 285 |
+
var sb = new JU.SB ();
|
| 286 |
+
var opsLeft = JU.PT.split (ops1, ",");
|
| 287 |
+
var opsRight = JU.PT.split (ops2, ",");
|
| 288 |
+
for (var i = 0; i < opsLeft.length; i++) for (var j = 0; j < opsRight.length; j++) sb.append (",").append (opsLeft[i]).append ("|").append (opsRight[j]);
|
| 289 |
+
|
| 290 |
+
|
| 291 |
+
return sb.toString ().substring (1);
|
| 292 |
+
}, "~S,~S");
|
| 293 |
+
Clazz.defineMethod (c$, "processStructOperListBlock",
|
| 294 |
+
function (isNCS) {
|
| 295 |
+
this.parseLoopParametersFor ((isNCS ? "_struct_ncs_oper" : "_pdbx_struct_oper_list"), isNCS ? J.adapter.readers.cif.MMCifReader.ncsoperFields : J.adapter.readers.cif.MMCifReader.operFields);
|
| 296 |
+
var m = Clazz.newFloatArray (16, 0);
|
| 297 |
+
m[15] = 1;
|
| 298 |
+
while (this.parser.getData ()) {
|
| 299 |
+
var count = 0;
|
| 300 |
+
var id = null;
|
| 301 |
+
var xyz = null;
|
| 302 |
+
var n = this.parser.getColumnCount ();
|
| 303 |
+
for (var i = 0; i < n; ++i) {
|
| 304 |
+
var p = this.fieldProperty (i);
|
| 305 |
+
switch (p) {
|
| 306 |
+
case -1:
|
| 307 |
+
break;
|
| 308 |
+
case 12:
|
| 309 |
+
id = this.field;
|
| 310 |
+
break;
|
| 311 |
+
case 13:
|
| 312 |
+
xyz = this.field;
|
| 313 |
+
break;
|
| 314 |
+
default:
|
| 315 |
+
m[p] = this.parseFloatStr (this.field);
|
| 316 |
+
++count;
|
| 317 |
+
}
|
| 318 |
+
}
|
| 319 |
+
if (id != null && (count == 12 || xyz != null && this.symmetry != null)) {
|
| 320 |
+
JU.Logger.info ((isNCS ? "noncrystallographic symmetry operator " : "assembly operator ") + id + " " + xyz);
|
| 321 |
+
var m4 = new JU.M4 ();
|
| 322 |
+
if (count != 12) {
|
| 323 |
+
this.symmetry.getMatrixFromString (xyz, m, false, 0);
|
| 324 |
+
m[3] *= this.symmetry.getUnitCellInfoType (0) / 12;
|
| 325 |
+
m[7] *= this.symmetry.getUnitCellInfoType (1) / 12;
|
| 326 |
+
m[11] *= this.symmetry.getUnitCellInfoType (2) / 12;
|
| 327 |
+
}m4.setA (m);
|
| 328 |
+
this.addMatrix (id, m4, isNCS);
|
| 329 |
+
}}
|
| 330 |
+
return true;
|
| 331 |
+
}, "~B");
|
| 332 |
+
Clazz.defineMethod (c$, "addMatrix",
|
| 333 |
+
function (id, m4, isNCS) {
|
| 334 |
+
if (isNCS) {
|
| 335 |
+
if (m4.equals (this.mident)) return;
|
| 336 |
+
m4.m33 = 0;
|
| 337 |
+
if (this.lstNCS == null) this.lstNCS = new JU.Lst ();
|
| 338 |
+
this.lstNCS.addLast (m4);
|
| 339 |
+
} else {
|
| 340 |
+
if (this.htBiomts == null) this.htBiomts = new java.util.Hashtable ();
|
| 341 |
+
this.htBiomts.put (id, m4);
|
| 342 |
+
}}, "~S,JU.M4,~B");
|
| 343 |
+
Clazz.defineMethod (c$, "processChemCompLoopBlock",
|
| 344 |
+
function () {
|
| 345 |
+
this.parseLoopParameters (J.adapter.readers.cif.MMCifReader.chemCompFields);
|
| 346 |
+
var groupName;
|
| 347 |
+
var hetName;
|
| 348 |
+
while (this.parser.getData ()) if (!this.isNull (groupName = this.getField (0)) && !this.isNull (hetName = this.getField (1))) this.addHetero (groupName, hetName, true, true);
|
| 349 |
+
|
| 350 |
+
return true;
|
| 351 |
+
});
|
| 352 |
+
Clazz.defineMethod (c$, "addHetero",
|
| 353 |
+
function (groupName, hetName, doCheck, addNote) {
|
| 354 |
+
if (doCheck && !this.vwr.getJBR ().isHetero (groupName)) return;
|
| 355 |
+
if (this.htHetero == null) this.htHetero = new java.util.Hashtable ();
|
| 356 |
+
if (doCheck && this.htHetero.containsKey (groupName)) return;
|
| 357 |
+
this.htHetero.put (groupName, hetName);
|
| 358 |
+
if (addNote) this.appendLoadNote (groupName + " = " + hetName);
|
| 359 |
+
}, "~S,~S,~B,~B");
|
| 360 |
+
Clazz.defineMethod (c$, "processStructConfLoopBlock",
|
| 361 |
+
function () {
|
| 362 |
+
if (this.ignoreStructure) {
|
| 363 |
+
this.parser.skipLoop (false);
|
| 364 |
+
return false;
|
| 365 |
+
}this.parseLoopParametersFor ("_struct_conf", J.adapter.readers.cif.MMCifReader.structConfFields);
|
| 366 |
+
if (!this.checkAllFieldsPresent (J.adapter.readers.cif.MMCifReader.structConfFields, -1, true)) {
|
| 367 |
+
this.parser.skipLoop (true);
|
| 368 |
+
return false;
|
| 369 |
+
}while (this.parser.getData ()) {
|
| 370 |
+
var structure = new J.adapter.smarter.Structure (-1, J.c.STR.HELIX, J.c.STR.HELIX, null, 0, 0, null);
|
| 371 |
+
var type = this.getField (0);
|
| 372 |
+
if (type.startsWith ("TURN")) structure.structureType = structure.substructureType = J.c.STR.TURN;
|
| 373 |
+
else if (!type.startsWith ("HELX")) structure.structureType = structure.substructureType = J.c.STR.NONE;
|
| 374 |
+
else structure.substructureType = J.adapter.smarter.Structure.getHelixType (this.parseIntStr (this.getField (9)));
|
| 375 |
+
structure.serialID = this.parseIntStr (this.getField (8));
|
| 376 |
+
structure.structureID = this.getField (7);
|
| 377 |
+
this.addStructure (structure);
|
| 378 |
+
}
|
| 379 |
+
return true;
|
| 380 |
+
});
|
| 381 |
+
Clazz.defineMethod (c$, "addStructure",
|
| 382 |
+
function (structure) {
|
| 383 |
+
structure.startChainID = this.vwr.getChainID (structure.startChainStr = this.getField (1), true);
|
| 384 |
+
structure.startSequenceNumber = this.parseIntStr (this.getField (2));
|
| 385 |
+
structure.startInsertionCode = this.getField (3).charAt (0);
|
| 386 |
+
structure.endChainID = this.vwr.getChainID (structure.endChainStr = this.getField (4), true);
|
| 387 |
+
structure.endSequenceNumber = this.parseIntStr (this.getField (5));
|
| 388 |
+
structure.endInsertionCode = this.getField (6).charAt (0);
|
| 389 |
+
this.asc.addStructure (structure);
|
| 390 |
+
}, "J.adapter.smarter.Structure");
|
| 391 |
+
Clazz.defineMethod (c$, "processStructSheetRangeLoopBlock",
|
| 392 |
+
function () {
|
| 393 |
+
if (this.ignoreStructure) {
|
| 394 |
+
this.parser.skipLoop (false);
|
| 395 |
+
return false;
|
| 396 |
+
}this.parseLoopParametersFor ("_struct_sheet_range", J.adapter.readers.cif.MMCifReader.structSheetRangeFields);
|
| 397 |
+
if (!this.checkAllFieldsPresent (J.adapter.readers.cif.MMCifReader.structSheetRangeFields, -1, true)) {
|
| 398 |
+
this.parser.skipLoop (true);
|
| 399 |
+
return false;
|
| 400 |
+
}while (this.parser.getData ()) this.addStructure ( new J.adapter.smarter.Structure (-1, J.c.STR.SHEET, J.c.STR.SHEET, this.getField (0), this.parseIntStr (this.getField (7)), 1, null));
|
| 401 |
+
|
| 402 |
+
return true;
|
| 403 |
+
});
|
| 404 |
+
Clazz.defineMethod (c$, "processStructSiteBlock",
|
| 405 |
+
function () {
|
| 406 |
+
this.parseLoopParametersFor ("_struct_site_gen", J.adapter.readers.cif.MMCifReader.structSiteFields);
|
| 407 |
+
var htSite = null;
|
| 408 |
+
this.htSites = new java.util.Hashtable ();
|
| 409 |
+
var seqNum;
|
| 410 |
+
var resID;
|
| 411 |
+
while (this.parser.getData ()) {
|
| 412 |
+
if (this.isNull (seqNum = this.getField (3)) || this.isNull (resID = this.getField (1))) continue;
|
| 413 |
+
var siteID = this.getField (0);
|
| 414 |
+
htSite = this.htSites.get (siteID);
|
| 415 |
+
if (htSite == null) {
|
| 416 |
+
htSite = new java.util.Hashtable ();
|
| 417 |
+
htSite.put ("groups", "");
|
| 418 |
+
this.htSites.put (siteID, htSite);
|
| 419 |
+
}var insCode = this.getField (4);
|
| 420 |
+
var chainID = this.getField (2);
|
| 421 |
+
var group = "[" + resID + "]" + seqNum + (this.isNull (insCode) ? "" : "^" + insCode) + (this.isNull (chainID) ? "" : ":" + chainID);
|
| 422 |
+
var groups = htSite.get ("groups");
|
| 423 |
+
groups += (groups.length == 0 ? "" : ",") + group;
|
| 424 |
+
htSite.put ("groups", groups);
|
| 425 |
+
}
|
| 426 |
+
return true;
|
| 427 |
+
});
|
| 428 |
+
Clazz.defineMethod (c$, "setBiomolecules",
|
| 429 |
+
function () {
|
| 430 |
+
if (this.assemblyIdAtoms == null && this.chainAtomCounts == null) return;
|
| 431 |
+
var bsAll = new JU.BS ();
|
| 432 |
+
for (var i = this.vBiomolecules.size (); --i >= 0; ) {
|
| 433 |
+
var biomolecule = this.vBiomolecules.get (i);
|
| 434 |
+
this.setBiomolecule (biomolecule, (biomolecule === this.thisBiomolecule ? bsAll : null));
|
| 435 |
+
}
|
| 436 |
+
if (this.isBiomolecule && bsAll.cardinality () < this.asc.ac) {
|
| 437 |
+
if (this.asc.bsAtoms != null) this.asc.bsAtoms.and (bsAll);
|
| 438 |
+
else if (!this.isCourseGrained) this.asc.bsAtoms = bsAll;
|
| 439 |
+
}});
|
| 440 |
+
Clazz.defineMethod (c$, "setBiomolecule",
|
| 441 |
+
function (biomolecule, bsAll) {
|
| 442 |
+
var biomtchains = biomolecule.get ("chains");
|
| 443 |
+
var biomts = biomolecule.get ("biomts");
|
| 444 |
+
var operators = biomolecule.get ("operators");
|
| 445 |
+
var assemblies = biomolecule.get ("assemblies");
|
| 446 |
+
var sum = new JU.P3 ();
|
| 447 |
+
var count = 0;
|
| 448 |
+
var bsAtoms = new JU.BS ();
|
| 449 |
+
var nAtomsTotal = 0;
|
| 450 |
+
var isBioCourse = (this.isBiomolecule && this.isCourseGrained);
|
| 451 |
+
for (var i = operators.size (); --i >= 0; ) {
|
| 452 |
+
var ops = JU.PT.split (operators.get (i), ",");
|
| 453 |
+
var ids = JU.PT.split (assemblies.get (i), "$");
|
| 454 |
+
var chainlist = "";
|
| 455 |
+
var nAtoms = 0;
|
| 456 |
+
for (var j = 1; j < ids.length; j++) {
|
| 457 |
+
var id = ids[j];
|
| 458 |
+
chainlist += ":" + id + ";";
|
| 459 |
+
if (this.assemblyIdAtoms != null) {
|
| 460 |
+
biomolecule.put ("asemblyIdAtoms", this.assemblyIdAtoms);
|
| 461 |
+
var bs = this.assemblyIdAtoms.get (id);
|
| 462 |
+
if (bs != null) {
|
| 463 |
+
bsAtoms.or (bs);
|
| 464 |
+
if (bsAll != null) bsAll.or (bs);
|
| 465 |
+
nAtoms += bs.cardinality ();
|
| 466 |
+
}} else if (isBioCourse) {
|
| 467 |
+
var asum = this.chainAtomMap.get (id);
|
| 468 |
+
if (asum != null) {
|
| 469 |
+
if (this.bySymop) {
|
| 470 |
+
sum.add (asum);
|
| 471 |
+
count += this.chainAtomCounts.get (id)[0];
|
| 472 |
+
} else {
|
| 473 |
+
this.createParticle (id);
|
| 474 |
+
nAtoms++;
|
| 475 |
+
}}}}
|
| 476 |
+
if (!this.isBiomolecule) continue;
|
| 477 |
+
for (var j = 0; j < ops.length; j++) {
|
| 478 |
+
var m = this.getOpMatrix (ops[j]);
|
| 479 |
+
if (m == null) return 0;
|
| 480 |
+
if (m.equals (this.mident)) {
|
| 481 |
+
biomts.add (0, this.mident);
|
| 482 |
+
biomtchains.add (0, chainlist);
|
| 483 |
+
} else {
|
| 484 |
+
biomts.addLast (m);
|
| 485 |
+
biomtchains.addLast (chainlist);
|
| 486 |
+
}}
|
| 487 |
+
if (this.bySymop && bsAll != null) {
|
| 488 |
+
nAtoms = 1;
|
| 489 |
+
var a1 = new J.adapter.smarter.Atom ();
|
| 490 |
+
a1.setT (sum);
|
| 491 |
+
a1.scale (1 / count);
|
| 492 |
+
a1.radius = 16;
|
| 493 |
+
this.asc.addAtom (a1);
|
| 494 |
+
}nAtoms *= ops.length;
|
| 495 |
+
nAtomsTotal += nAtoms;
|
| 496 |
+
}
|
| 497 |
+
biomolecule.put ("atomCount", Integer.$valueOf (nAtomsTotal));
|
| 498 |
+
return nAtomsTotal;
|
| 499 |
+
}, "java.util.Map,JU.BS");
|
| 500 |
+
Clazz.defineMethod (c$, "createParticle",
|
| 501 |
+
function (id) {
|
| 502 |
+
var asum = this.chainAtomMap.get (id);
|
| 503 |
+
var c = this.chainAtomCounts.get (id)[0];
|
| 504 |
+
var a = new J.adapter.smarter.Atom ();
|
| 505 |
+
a.setT (asum);
|
| 506 |
+
a.scale (1 / c);
|
| 507 |
+
a.elementSymbol = "Pt";
|
| 508 |
+
this.setChainID (a, id);
|
| 509 |
+
a.radius = 16;
|
| 510 |
+
this.asc.addAtom (a);
|
| 511 |
+
}, "~S");
|
| 512 |
+
Clazz.defineMethod (c$, "getOpMatrix",
|
| 513 |
+
function (ops) {
|
| 514 |
+
if (this.htBiomts == null) return JU.M4.newM4 (null);
|
| 515 |
+
var pt = ops.indexOf ("|");
|
| 516 |
+
if (pt >= 0) {
|
| 517 |
+
var m = JU.M4.newM4 (this.htBiomts.get (ops.substring (0, pt)));
|
| 518 |
+
m.mul (this.htBiomts.get (ops.substring (pt + 1)));
|
| 519 |
+
return m;
|
| 520 |
+
}return this.htBiomts.get (ops);
|
| 521 |
+
}, "~S");
|
| 522 |
+
Clazz.defineMethod (c$, "processStructConnLoopBlock",
|
| 523 |
+
function () {
|
| 524 |
+
this.parseLoopParametersFor ("_struct_conn", J.adapter.readers.cif.MMCifReader.structConnFields);
|
| 525 |
+
while (this.parser.getData ()) {
|
| 526 |
+
var sym1 = this.getField (5);
|
| 527 |
+
var sym2 = this.getField (11);
|
| 528 |
+
if (!sym1.equals (sym2) || !this.isNull (sym1) && !sym1.equals ("1_555")) continue;
|
| 529 |
+
var type = this.getField (12);
|
| 530 |
+
if (!type.startsWith ("covale") && !type.equals ("disulf") && !type.equals ("metalc")) continue;
|
| 531 |
+
if (this.htBondMap == null) this.htBondMap = new java.util.Hashtable ();
|
| 532 |
+
var key1 = this.vwr.getChainID (this.getField (0), true) + this.getField (2) + this.parseFloatStr (this.getField (1)) + this.getField (3) + this.getField (4);
|
| 533 |
+
var key2 = this.vwr.getChainID (this.getField (6), true) + this.getField (8) + this.parseFloatStr (this.getField (7)) + this.getField (9) + this.getField (10);
|
| 534 |
+
var order = this.getBondOrder (this.getField (13));
|
| 535 |
+
if (this.structConnMap == null) this.structConnMap = new JU.Lst ();
|
| 536 |
+
this.structConnMap.addLast ( Clazz.newArray (-1, [key1, key2, Integer.$valueOf (order)]));
|
| 537 |
+
if (this.structConnList.indexOf (key1) < 0) this.structConnList += key1;
|
| 538 |
+
if (this.structConnList.indexOf (key2) < 0) this.structConnList += key2;
|
| 539 |
+
}
|
| 540 |
+
return true;
|
| 541 |
+
});
|
| 542 |
+
Clazz.defineMethod (c$, "processCompBondLoopBlock",
|
| 543 |
+
function () {
|
| 544 |
+
this.doSetBonds = true;
|
| 545 |
+
this.parseLoopParametersFor ("_chem_comp_bond", J.adapter.readers.cif.MMCifReader.chemCompBondFields);
|
| 546 |
+
while (this.parser.getData ()) {
|
| 547 |
+
var comp = this.getField (0);
|
| 548 |
+
var atom1 = this.getField (1);
|
| 549 |
+
var atom2 = this.getField (2);
|
| 550 |
+
var order = this.getBondOrder (this.getField (3));
|
| 551 |
+
if ((this.getField (4).charAt (0) == 'Y')) switch (order) {
|
| 552 |
+
case 1:
|
| 553 |
+
order = 513;
|
| 554 |
+
break;
|
| 555 |
+
case 2:
|
| 556 |
+
order = 514;
|
| 557 |
+
break;
|
| 558 |
+
}
|
| 559 |
+
if (this.isLigand) {
|
| 560 |
+
this.asc.addNewBondWithOrderA (this.asc.getAtomFromName (atom1), this.asc.getAtomFromName (atom2), order);
|
| 561 |
+
} else if (this.haveHAtoms || this.htHetero != null && this.htHetero.containsKey (comp)) {
|
| 562 |
+
if (this.htBondMap == null) this.htBondMap = new java.util.Hashtable ();
|
| 563 |
+
var cmap = this.htBondMap.get (comp);
|
| 564 |
+
if (cmap == null) this.htBondMap.put (comp, cmap = new JU.Lst ());
|
| 565 |
+
cmap.addLast ( Clazz.newArray (-1, [atom1, atom2, Integer.$valueOf (this.haveHAtoms ? order : 1)]));
|
| 566 |
+
}}
|
| 567 |
+
return true;
|
| 568 |
+
});
|
| 569 |
+
Clazz.overrideMethod (c$, "processSubclassAtom",
|
| 570 |
+
function (atom, assemblyId, strChain) {
|
| 571 |
+
if (this.isBiomolecule) {
|
| 572 |
+
if (this.isCourseGrained) {
|
| 573 |
+
var sum = this.chainAtomMap.get (assemblyId);
|
| 574 |
+
if (sum == null) {
|
| 575 |
+
this.chainAtomMap.put (assemblyId, sum = new JU.P3 ());
|
| 576 |
+
this.chainAtomCounts.put (assemblyId, Clazz.newIntArray (1, 0));
|
| 577 |
+
}this.chainAtomCounts.get (assemblyId)[0]++;
|
| 578 |
+
sum.add (atom);
|
| 579 |
+
return false;
|
| 580 |
+
}} else if (this.byChain) {
|
| 581 |
+
if (this.thisChain != atom.chainID) {
|
| 582 |
+
this.thisChain = atom.chainID;
|
| 583 |
+
this.chainSum = this.chainAtomMap.get (strChain);
|
| 584 |
+
if (this.chainSum == null) {
|
| 585 |
+
this.chainAtomMap.put (strChain, this.chainSum = new JU.P3 ());
|
| 586 |
+
this.chainAtomCounts.put (strChain, this.chainAtomCount = Clazz.newIntArray (1, 0));
|
| 587 |
+
}}this.chainSum.add (atom);
|
| 588 |
+
this.chainAtomCount[0]++;
|
| 589 |
+
return false;
|
| 590 |
+
}if (assemblyId != null) {
|
| 591 |
+
if (this.assemblyIdAtoms == null) this.assemblyIdAtoms = new java.util.Hashtable ();
|
| 592 |
+
var bs = this.assemblyIdAtoms.get (assemblyId);
|
| 593 |
+
if (bs == null) this.assemblyIdAtoms.put (assemblyId, bs = new JU.BS ());
|
| 594 |
+
bs.set (this.ac);
|
| 595 |
+
}return true;
|
| 596 |
+
}, "J.adapter.smarter.Atom,~S,~S");
|
| 597 |
+
Clazz.overrideMethod (c$, "checkPDBModelField",
|
| 598 |
+
function (modelField, currentModelNo) {
|
| 599 |
+
this.fieldProperty (modelField);
|
| 600 |
+
var modelNo = this.parseIntStr (this.field);
|
| 601 |
+
return (modelNo == currentModelNo ? modelNo : this.incrementModel (modelNo));
|
| 602 |
+
}, "~N,~N");
|
| 603 |
+
Clazz.defineMethod (c$, "incrementModel",
|
| 604 |
+
function (modelNo) {
|
| 605 |
+
var isAssembly = (this.thisDataSetName != null && this.thisDataSetName.indexOf ("-assembly-") >= 0);
|
| 606 |
+
if (isAssembly) {
|
| 607 |
+
this.useFileModelNumbers = true;
|
| 608 |
+
var key = "," + modelNo + ",";
|
| 609 |
+
if (this.modelStrings.indexOf (key) >= 0) {
|
| 610 |
+
this.requiresSorting = true;
|
| 611 |
+
} else {
|
| 612 |
+
this.modelStrings += key;
|
| 613 |
+
}}if (this.iHaveDesiredModel && this.asc.atomSetCount > 0 && !isAssembly) {
|
| 614 |
+
this.done = true;
|
| 615 |
+
if (this.parser != null) {
|
| 616 |
+
this.parser.skipLoop (false);
|
| 617 |
+
this.skipping = false;
|
| 618 |
+
}this.continuing = true;
|
| 619 |
+
return -2147483648;
|
| 620 |
+
}var modelNumberToUse = (this.useFileModelNumbers ? modelNo : ++this.modelIndex);
|
| 621 |
+
this.setHetero ();
|
| 622 |
+
this.newModel (modelNumberToUse);
|
| 623 |
+
if (!this.skipping) {
|
| 624 |
+
this.nextAtomSet ();
|
| 625 |
+
if (this.modelMap == null || this.asc.ac == 0) this.modelMap = new java.util.Hashtable ();
|
| 626 |
+
this.modelMap.put ("" + modelNo, Integer.$valueOf (Math.max (0, this.asc.iSet)));
|
| 627 |
+
this.modelMap.put ("_" + Math.max (0, this.asc.iSet), Integer.$valueOf (modelNo));
|
| 628 |
+
}return modelNo;
|
| 629 |
+
}, "~N");
|
| 630 |
+
Clazz.defineMethod (c$, "setHetero",
|
| 631 |
+
function () {
|
| 632 |
+
if (this.htHetero != null) {
|
| 633 |
+
this.asc.setCurrentModelInfo ("hetNames", this.htHetero);
|
| 634 |
+
this.asc.setInfo ("hetNames", this.htHetero);
|
| 635 |
+
}});
|
| 636 |
+
Clazz.defineStatics (c$,
|
| 637 |
+
"OPER_ID", 12,
|
| 638 |
+
"OPER_XYZ", 13,
|
| 639 |
+
"FAMILY_NCS_CAT", "_struct_ncs_oper.",
|
| 640 |
+
"FAMILY_NCS", "_struct_ncs_oper",
|
| 641 |
+
"ncsoperFields", Clazz.newArray (-1, ["*_matrix[1][1]", "*_matrix[1][2]", "*_matrix[1][3]", "*_vector[1]", "*_matrix[2][1]", "*_matrix[2][2]", "*_matrix[2][3]", "*_vector[2]", "*_matrix[3][1]", "*_matrix[3][2]", "*_matrix[3][3]", "*_vector[3]", "*_id", "*_symmetry_operation"]),
|
| 642 |
+
"FAMILY_OPER_CAT", "_pdbx_struct_oper_list.",
|
| 643 |
+
"FAMILY_OPER", "_pdbx_struct_oper_list",
|
| 644 |
+
"operFields", Clazz.newArray (-1, ["*_matrix[1][1]", "*_matrix[1][2]", "*_matrix[1][3]", "*_vector[1]", "*_matrix[2][1]", "*_matrix[2][2]", "*_matrix[2][3]", "*_vector[2]", "*_matrix[3][1]", "*_matrix[3][2]", "*_matrix[3][3]", "*_vector[3]", "*_id", "*_symmetry_operation"]),
|
| 645 |
+
"ASSEM_ID", 0,
|
| 646 |
+
"ASSEM_OPERS", 1,
|
| 647 |
+
"ASSEM_LIST", 2,
|
| 648 |
+
"FAMILY_ASSEM_CAT", "_pdbx_struct_assembly_gen.",
|
| 649 |
+
"assemblyFields", Clazz.newArray (-1, ["_pdbx_struct_assembly_gen_assembly_id", "_pdbx_struct_assembly_gen_oper_expression", "_pdbx_struct_assembly_gen_asym_id_list"]),
|
| 650 |
+
"FAMILY_SEQUENCEDIF_CAT", "_struct_ref_seq_dif.",
|
| 651 |
+
"STRUCT_REF_G3", 0,
|
| 652 |
+
"STRUCT_REF_G1", 1,
|
| 653 |
+
"structRefFields", Clazz.newArray (-1, ["_struct_ref_seq_dif_mon_id", "_struct_ref_seq_dif_db_mon_id"]),
|
| 654 |
+
"CHEM_COMP_ID", 0,
|
| 655 |
+
"CHEM_COMP_NAME", 1,
|
| 656 |
+
"FAMILY_CHEMCOMP_CAT", "_chem_comp.",
|
| 657 |
+
"chemCompFields", Clazz.newArray (-1, ["_chem_comp_id", "_chem_comp_name"]),
|
| 658 |
+
"CONF_TYPE_ID", 0,
|
| 659 |
+
"BEG_ASYM_ID", 1,
|
| 660 |
+
"BEG_SEQ_ID", 2,
|
| 661 |
+
"BEG_INS_CODE", 3,
|
| 662 |
+
"END_ASYM_ID", 4,
|
| 663 |
+
"END_SEQ_ID", 5,
|
| 664 |
+
"END_INS_CODE", 6,
|
| 665 |
+
"STRUCT_ID", 7,
|
| 666 |
+
"SERIAL_NO", 8,
|
| 667 |
+
"HELIX_CLASS", 9,
|
| 668 |
+
"FAMILY_STRUCTCONF_CAT", "_struct_conf.",
|
| 669 |
+
"FAMILY_STRUCTCONF", "_struct_conf",
|
| 670 |
+
"structConfFields", Clazz.newArray (-1, ["*_conf_type_id", "*_beg_auth_asym_id", "*_beg_auth_seq_id", "*_pdbx_beg_pdb_ins_code", "*_end_auth_asym_id", "*_end_auth_seq_id", "*_pdbx_end_pdb_ins_code", "*_id", "*_pdbx_pdb_helix_id", "*_pdbx_pdb_helix_class"]),
|
| 671 |
+
"SHEET_ID", 0,
|
| 672 |
+
"STRAND_ID", 7,
|
| 673 |
+
"FAMILY_SHEET_CAT", "_struct_sheet_range.",
|
| 674 |
+
"FAMILY_SHEET", "_struct_sheet_range",
|
| 675 |
+
"structSheetRangeFields", Clazz.newArray (-1, ["*_sheet_id", "*_beg_auth_asym_id", "*_beg_auth_seq_id", "*_pdbx_beg_pdb_ins_code", "*_end_auth_asym_id", "*_end_auth_seq_id", "*_pdbx_end_pdb_ins_code", "*_id"]),
|
| 676 |
+
"SITE_ID", 0,
|
| 677 |
+
"SITE_COMP_ID", 1,
|
| 678 |
+
"SITE_ASYM_ID", 2,
|
| 679 |
+
"SITE_SEQ_ID", 3,
|
| 680 |
+
"SITE_INS_CODE", 4,
|
| 681 |
+
"FAMILY_STRUCSITE_CAT", "_struct_site_gen.",
|
| 682 |
+
"FAMILY_STRUCSITE", "_struct_site_gen",
|
| 683 |
+
"structSiteFields", Clazz.newArray (-1, ["*_site_id", "*_auth_comp_id", "*_auth_asym_id", "*_auth_seq_id", "*_label_alt_id"]),
|
| 684 |
+
"STRUCT_CONN_ASYM1", 0,
|
| 685 |
+
"STRUCT_CONN_SEQ1", 1,
|
| 686 |
+
"STRUCT_CONN_COMP1", 2,
|
| 687 |
+
"STRUCT_CONN_ATOM1", 3,
|
| 688 |
+
"STRUCT_CONN_ALT1", 4,
|
| 689 |
+
"STRUCT_CONN_SYMM1", 5,
|
| 690 |
+
"STRUCT_CONN_ASYM2", 6,
|
| 691 |
+
"STRUCT_CONN_SEQ2", 7,
|
| 692 |
+
"STRUCT_CONN_COMP2", 8,
|
| 693 |
+
"STRUCT_CONN_ATOM2", 9,
|
| 694 |
+
"STRUCT_CONN_ALT2", 10,
|
| 695 |
+
"STRUCT_CONN_SYMM2", 11,
|
| 696 |
+
"STRUCT_CONN_TYPE", 12,
|
| 697 |
+
"STRUCT_CONN_ORDER", 13,
|
| 698 |
+
"FAMILY_STRUCTCONN_CAT", "_struct_conn.",
|
| 699 |
+
"FAMILY_STRUCTCONN", "_struct_conn",
|
| 700 |
+
"structConnFields", Clazz.newArray (-1, ["*_ptnr1_auth_asym_id", "*_ptnr1_auth_seq_id", "*_ptnr1_auth_comp_id", "*_ptnr1_label_atom_id", "*_pdbx_ptnr1_label_alt_id", "*_ptnr1_symmetry", "*_ptnr2_auth_asym_id", "*_ptnr2_auth_seq_id", "*_ptnr2_auth_comp_id", "*_ptnr2_label_atom_id", "*_pdbx_ptnr2_label_alt_id", "*_ptnr2_symmetry", "*_conn_type_id", "*_pdbx_value_order"]),
|
| 701 |
+
"CHEM_COMP_BOND_ID", 0,
|
| 702 |
+
"CHEM_COMP_BOND_ATOM_ID_1", 1,
|
| 703 |
+
"CHEM_COMP_BOND_ATOM_ID_2", 2,
|
| 704 |
+
"CHEM_COMP_BOND_VALUE_ORDER", 3,
|
| 705 |
+
"CHEM_COMP_BOND_AROMATIC_FLAG", 4,
|
| 706 |
+
"FAMILY_COMPBOND_CAT", "_chem_comp_bond.",
|
| 707 |
+
"FAMILY_COMPBOND", "_chem_comp_bond",
|
| 708 |
+
"chemCompBondFields", Clazz.newArray (-1, ["*_comp_id", "*_atom_id_1", "*_atom_id_2", "*_value_order", "*_pdbx_aromatic_flag"]));
|
| 709 |
+
});
|
static/j2s/J/adapter/readers/cif/MMCifValidationParser.js
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Clazz.declarePackage ("J.adapter.readers.cif");
|
| 2 |
+
Clazz.load (null, "J.adapter.readers.cif.MMCifValidationParser", ["java.lang.Character", "java.util.Hashtable", "JU.Lst", "$.PT", "JS.SV", "JU.Logger"], function () {
|
| 3 |
+
c$ = Clazz.decorateAsClass (function () {
|
| 4 |
+
this.asResidues = false;
|
| 5 |
+
this.reader = null;
|
| 6 |
+
this.resMap = null;
|
| 7 |
+
this.atomMap = null;
|
| 8 |
+
Clazz.instantialize (this, arguments);
|
| 9 |
+
}, J.adapter.readers.cif, "MMCifValidationParser");
|
| 10 |
+
Clazz.makeConstructor (c$,
|
| 11 |
+
function () {
|
| 12 |
+
});
|
| 13 |
+
Clazz.defineMethod (c$, "set",
|
| 14 |
+
function (reader) {
|
| 15 |
+
this.reader = reader;
|
| 16 |
+
this.asResidues = reader.checkFilterKey ("ASRES");
|
| 17 |
+
return this;
|
| 18 |
+
}, "J.adapter.smarter.AtomSetCollectionReader");
|
| 19 |
+
Clazz.defineMethod (c$, "finalizeValidations",
|
| 20 |
+
function (vwr, modelMap) {
|
| 21 |
+
var map = this.reader.dssr;
|
| 22 |
+
if (map != null) return vwr.getAnnotationParser (true).fixDSSRJSONMap (map);
|
| 23 |
+
this.mapAtomResIDs (modelMap);
|
| 24 |
+
var svMap = this.reader.validation;
|
| 25 |
+
var retProps = this.reader.vwr.getAnnotationParser (false).catalogValidations (this.reader.vwr, svMap, this.getModelAtomIndices (), this.resMap, (this.asResidues ? null : this.atomMap), modelMap);
|
| 26 |
+
var note = (retProps == null || retProps.size () == 0 ? null : this.setProperties (retProps));
|
| 27 |
+
svMap.mapPut ("_note", JS.SV.newS (note));
|
| 28 |
+
return note;
|
| 29 |
+
}, "JV.Viewer,java.util.Map");
|
| 30 |
+
Clazz.defineMethod (c$, "finalizeRna3d",
|
| 31 |
+
function (modelMap) {
|
| 32 |
+
this.mapAtomResIDs (modelMap);
|
| 33 |
+
var svMap = this.getRna3dMap (this.reader.addedData);
|
| 34 |
+
var note = this.reader.vwr.getAnnotationParser (false).catalogStructureUnits (this.reader.vwr, svMap, this.getModelAtomIndices (), this.resMap, null, modelMap);
|
| 35 |
+
svMap.mapPut ("_note", JS.SV.newS (note));
|
| 36 |
+
for (var i = this.reader.asc.atomSetCount; --i >= 0; ) {
|
| 37 |
+
var info = this.reader.asc.getAtomSetAuxiliaryInfo (i);
|
| 38 |
+
info.put ("rna3d", svMap);
|
| 39 |
+
}
|
| 40 |
+
return note;
|
| 41 |
+
}, "java.util.Map");
|
| 42 |
+
Clazz.defineMethod (c$, "getRna3dMap",
|
| 43 |
+
function (addedData) {
|
| 44 |
+
var map = new java.util.Hashtable ();
|
| 45 |
+
var next = Clazz.newIntArray (1, 0);
|
| 46 |
+
var id = "";
|
| 47 |
+
while ((id = JU.PT.getQuotedStringNext (addedData, next)).length > 0) {
|
| 48 |
+
var units = JU.PT.getQuotedStringNext (addedData, next);
|
| 49 |
+
var type = "?";
|
| 50 |
+
switch (id.charAt (0)) {
|
| 51 |
+
case 'H':
|
| 52 |
+
type = "hairpinLoops";
|
| 53 |
+
break;
|
| 54 |
+
case 'I':
|
| 55 |
+
type = "internalLoops";
|
| 56 |
+
break;
|
| 57 |
+
case 'J':
|
| 58 |
+
type = "junctions";
|
| 59 |
+
break;
|
| 60 |
+
default:
|
| 61 |
+
JU.Logger.error ("MMCif could not read: " + id + " " + units);
|
| 62 |
+
continue;
|
| 63 |
+
}
|
| 64 |
+
var list = map.get (type);
|
| 65 |
+
if (list == null) map.put (type, list = new JU.Lst ());
|
| 66 |
+
var m = new java.util.Hashtable ();
|
| 67 |
+
m.put ("index", Integer.$valueOf (JU.PT.parseInt (id.substring (id.lastIndexOf ("_") + 1))));
|
| 68 |
+
m.put ("units", units);
|
| 69 |
+
list.addLast (m);
|
| 70 |
+
}
|
| 71 |
+
return JS.SV.getVariableMap (map);
|
| 72 |
+
}, "~S");
|
| 73 |
+
Clazz.defineMethod (c$, "mapAtomResIDs",
|
| 74 |
+
function (modelMap) {
|
| 75 |
+
var atoms = this.reader.asc.atoms;
|
| 76 |
+
this.resMap = new java.util.Hashtable ();
|
| 77 |
+
this.atomMap = new java.util.Hashtable ();
|
| 78 |
+
var iresLast = -1;
|
| 79 |
+
var resLast = null;
|
| 80 |
+
var smodel = "" + modelMap.get ("_0");
|
| 81 |
+
for (var i = 0, model = 1, i0 = 0, n = this.reader.asc.getAtomSetAtomCount (0); i < n; i++) {
|
| 82 |
+
var a = atoms[i];
|
| 83 |
+
var ires = a.sequenceNumber;
|
| 84 |
+
var res = smodel + "_" + a.chainID + "_" + ires + "_" + (a.insertionCode == '\0' ? "" : "" + a.insertionCode);
|
| 85 |
+
var atom = res + "_" + a.atomName.toUpperCase () + "_" + (a.altLoc == '\0' ? "" : "" + Character.toLowerCase (a.altLoc));
|
| 86 |
+
var ia = Integer.$valueOf (i - i0);
|
| 87 |
+
if (ires != iresLast) {
|
| 88 |
+
iresLast = ires;
|
| 89 |
+
if (resLast != null) resLast[1] = i - i0;
|
| 90 |
+
this.resMap.put (res, resLast = Clazz.newIntArray (-1, [i - i0, n]));
|
| 91 |
+
}this.atomMap.put (atom, ia);
|
| 92 |
+
if (i == n - 1) {
|
| 93 |
+
i0 += n;
|
| 94 |
+
n = this.reader.asc.getAtomSetAtomCount (model++);
|
| 95 |
+
}}
|
| 96 |
+
}, "java.util.Map");
|
| 97 |
+
Clazz.defineMethod (c$, "getModelAtomIndices",
|
| 98 |
+
function () {
|
| 99 |
+
var indices = Clazz.newIntArray (this.reader.asc.atomSetCount + 1, 0);
|
| 100 |
+
for (var m = indices.length - 1; --m >= 0; ) indices[m] = this.reader.baseAtomIndex + this.reader.asc.getAtomSetAtomIndex (m);
|
| 101 |
+
|
| 102 |
+
indices[indices.length - 1] = this.reader.asc.ac;
|
| 103 |
+
return indices;
|
| 104 |
+
});
|
| 105 |
+
Clazz.defineMethod (c$, "setProperties",
|
| 106 |
+
function (propList) {
|
| 107 |
+
var note = "Validations loaded:";
|
| 108 |
+
for (var i = 0, n = propList.size (); i < n; ) {
|
| 109 |
+
var key = propList.get (i++);
|
| 110 |
+
var f = propList.get (i++);
|
| 111 |
+
var model = (propList.get (i++)).intValue ();
|
| 112 |
+
var isGroup = (propList.get (i++)).booleanValue ();
|
| 113 |
+
var count = 0;
|
| 114 |
+
var max = 0;
|
| 115 |
+
var reslast = -1;
|
| 116 |
+
var i0 = this.reader.asc.getAtomSetAtomIndex (model);
|
| 117 |
+
for (var j = f.length; --j >= 0; ) if (f[j] != 0) {
|
| 118 |
+
if (isGroup) {
|
| 119 |
+
var res = this.reader.asc.atoms[i0 + j].sequenceNumber;
|
| 120 |
+
if (res != reslast) {
|
| 121 |
+
reslast = res;
|
| 122 |
+
count++;
|
| 123 |
+
}} else {
|
| 124 |
+
count++;
|
| 125 |
+
}max = Math.max (f[j], max);
|
| 126 |
+
}
|
| 127 |
+
note += "\n property_" + key + " (" + (isGroup ? "residues: " : "atoms: ") + count + (max == 1 ? "" : ", max: " + (Clazz.floatToInt (max * 100)) / 100) + ")";
|
| 128 |
+
this.reader.asc.setAtomProperties (key, f, model, isGroup);
|
| 129 |
+
}
|
| 130 |
+
return note;
|
| 131 |
+
}, "JU.Lst");
|
| 132 |
+
});
|
static/j2s/J/adapter/readers/cif/MMTFReader.js
ADDED
|
@@ -0,0 +1,286 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Clazz.declarePackage ("J.adapter.readers.cif");
|
| 2 |
+
Clazz.load (["J.adapter.readers.cif.MMCifReader"], "J.adapter.readers.cif.MMTFReader", ["java.lang.Boolean", "java.util.Hashtable", "JU.BS", "$.Lst", "$.M4", "$.MessagePackReader", "$.PT", "$.SB", "J.adapter.smarter.Atom", "$.Bond", "$.Structure", "JS.SV", "JU.Logger"], function () {
|
| 3 |
+
c$ = Clazz.decorateAsClass (function () {
|
| 4 |
+
this.haveStructure = false;
|
| 5 |
+
this.$pdbID = null;
|
| 6 |
+
this.map = null;
|
| 7 |
+
this.fileAtomCount = 0;
|
| 8 |
+
this.opCount = 0;
|
| 9 |
+
this.groupModels = null;
|
| 10 |
+
this.groupMap = null;
|
| 11 |
+
this.groupDSSP = null;
|
| 12 |
+
this.atomGroup = null;
|
| 13 |
+
this.labelAsymList = null;
|
| 14 |
+
this.atomMap = null;
|
| 15 |
+
this.entities = null;
|
| 16 |
+
this.groupCount = 0;
|
| 17 |
+
this.ac0 = 0;
|
| 18 |
+
this.bsStructures = null;
|
| 19 |
+
this.lastGroup = 0;
|
| 20 |
+
Clazz.instantialize (this, arguments);
|
| 21 |
+
}, J.adapter.readers.cif, "MMTFReader", J.adapter.readers.cif.MMCifReader);
|
| 22 |
+
Clazz.overrideMethod (c$, "addHeader",
|
| 23 |
+
function () {
|
| 24 |
+
});
|
| 25 |
+
Clazz.overrideMethod (c$, "setup",
|
| 26 |
+
function (fullPath, htParams, reader) {
|
| 27 |
+
this.isBinary = true;
|
| 28 |
+
this.isMMCIF = true;
|
| 29 |
+
this.iHaveFractionalCoordinates = false;
|
| 30 |
+
this.setupASCR (fullPath, htParams, reader);
|
| 31 |
+
}, "~S,java.util.Map,~O");
|
| 32 |
+
Clazz.overrideMethod (c$, "processBinaryDocument",
|
| 33 |
+
function () {
|
| 34 |
+
var doDoubleBonds = (!this.isCourseGrained && !this.checkFilterKey ("NODOUBLE"));
|
| 35 |
+
this.isDSSP1 = !this.checkFilterKey ("DSSP2");
|
| 36 |
+
var mmtfImplementsDSSP2 = false;
|
| 37 |
+
this.applySymmetryToBonds = true;
|
| 38 |
+
this.map = ( new JU.MessagePackReader (this.binaryDoc, true)).readMap ();
|
| 39 |
+
this.entities = this.map.get ("entityList");
|
| 40 |
+
if (JU.Logger.debugging) {
|
| 41 |
+
for (var s, $s = this.map.keySet ().iterator (); $s.hasNext () && ((s = $s.next ()) || true);) JU.Logger.info (s);
|
| 42 |
+
|
| 43 |
+
}this.asc.setInfo ("noAutoBond", Boolean.TRUE);
|
| 44 |
+
JU.Logger.info ("MMTF version " + this.map.get ("mmtfVersion"));
|
| 45 |
+
JU.Logger.info ("MMTF Producer " + this.map.get ("mmtfProducer"));
|
| 46 |
+
var title = this.map.get ("title");
|
| 47 |
+
if (title != null) this.appendLoadNote (title);
|
| 48 |
+
this.$pdbID = this.map.get ("structureId");
|
| 49 |
+
if (this.$pdbID == null) this.$pdbID = this.map.get ("pdbId");
|
| 50 |
+
this.fileAtomCount = (this.map.get ("numAtoms")).intValue ();
|
| 51 |
+
var nBonds = (this.map.get ("numBonds")).intValue ();
|
| 52 |
+
this.groupCount = (this.map.get ("numGroups")).intValue ();
|
| 53 |
+
this.groupModels = Clazz.newIntArray (this.groupCount, 0);
|
| 54 |
+
this.groupDSSP = Clazz.newIntArray (this.groupCount, 0);
|
| 55 |
+
this.groupMap = Clazz.newIntArray (this.groupCount, 0);
|
| 56 |
+
var modelCount = (this.map.get ("numModels")).intValue ();
|
| 57 |
+
this.appendLoadNote ("id=" + this.$pdbID + " numAtoms=" + this.fileAtomCount + " numBonds=" + nBonds + " numGroups=" + this.groupCount + " numModels=" + modelCount);
|
| 58 |
+
this.getMMTFAtoms (doDoubleBonds);
|
| 59 |
+
if (!this.isCourseGrained) {
|
| 60 |
+
var bo = this.decode ("bondOrderList");
|
| 61 |
+
var bi = this.decode ("bondAtomList");
|
| 62 |
+
this.addMMTFBonds (bo, bi, 0, doDoubleBonds, true);
|
| 63 |
+
if (this.isDSSP1 || mmtfImplementsDSSP2) this.getStructure ();
|
| 64 |
+
}this.setMMTFSymmetry ();
|
| 65 |
+
this.getMMTFBioAssembly ();
|
| 66 |
+
this.setModelPDB (true);
|
| 67 |
+
if (JU.Logger.debuggingHigh) JU.Logger.info (JS.SV.getVariable (this.map).asString ());
|
| 68 |
+
});
|
| 69 |
+
Clazz.defineMethod (c$, "applySymmetryAndSetTrajectory",
|
| 70 |
+
function () {
|
| 71 |
+
this.ac0 = this.ac;
|
| 72 |
+
Clazz.superCall (this, J.adapter.readers.cif.MMTFReader, "applySymmetryAndSetTrajectory", []);
|
| 73 |
+
if (this.haveStructure) this.addStructureSymmetry ();
|
| 74 |
+
});
|
| 75 |
+
Clazz.defineMethod (c$, "getMMTFAtoms",
|
| 76 |
+
function (doMulti) {
|
| 77 |
+
var chainsPerModel = this.map.get ("chainsPerModel");
|
| 78 |
+
var groupsPerChain = this.map.get ("groupsPerChain");
|
| 79 |
+
this.labelAsymList = this.decode ("chainIdList");
|
| 80 |
+
var authAsymList = this.decode ("chainNameList");
|
| 81 |
+
var groupTypeList = this.decode ("groupTypeList");
|
| 82 |
+
var groupIdList = this.decode ("groupIdList");
|
| 83 |
+
var groupList = this.map.get ("groupList");
|
| 84 |
+
var insCodes = this.decode ("insCodeList");
|
| 85 |
+
var atomId = this.decode ("atomIdList");
|
| 86 |
+
var haveSerial = (atomId != null);
|
| 87 |
+
var altloc = this.decode ("altLocList");
|
| 88 |
+
var occ = this.decode ("occupancyList");
|
| 89 |
+
var x = this.decode ("xCoordList");
|
| 90 |
+
var y = this.decode ("yCoordList");
|
| 91 |
+
var z = this.decode ("zCoordList");
|
| 92 |
+
var bf = this.decode ("bFactorList");
|
| 93 |
+
var nameList = (this.useAuthorChainID ? authAsymList : this.labelAsymList);
|
| 94 |
+
var iModel = -1;
|
| 95 |
+
var iChain = 0;
|
| 96 |
+
var nChain = 0;
|
| 97 |
+
var iGroup = 0;
|
| 98 |
+
var nGroup = 0;
|
| 99 |
+
var chainpt = 0;
|
| 100 |
+
var seqNo = 0;
|
| 101 |
+
var iatom = 0;
|
| 102 |
+
var chainID = "";
|
| 103 |
+
var authAsym = "";
|
| 104 |
+
var labelAsym = "";
|
| 105 |
+
var insCode = '\u0000';
|
| 106 |
+
this.atomMap = new Array (this.fileAtomCount);
|
| 107 |
+
this.atomGroup = Clazz.newIntArray (this.fileAtomCount, 0);
|
| 108 |
+
for (var j = 0, thisGroup = -1; j < this.groupCount; j++) {
|
| 109 |
+
if (++iGroup >= nGroup) {
|
| 110 |
+
chainID = nameList[chainpt];
|
| 111 |
+
authAsym = authAsymList[chainpt];
|
| 112 |
+
labelAsym = this.labelAsymList[chainpt];
|
| 113 |
+
nGroup = groupsPerChain[chainpt++];
|
| 114 |
+
iGroup = 0;
|
| 115 |
+
if (++iChain >= nChain) {
|
| 116 |
+
this.groupModels[j] = ++iModel;
|
| 117 |
+
nChain = chainsPerModel[iModel];
|
| 118 |
+
iChain = 0;
|
| 119 |
+
this.setModelPDB (true);
|
| 120 |
+
this.incrementModel (iModel + 1);
|
| 121 |
+
this.asc.setCurrentModelInfo ("pdbID", this.$pdbID);
|
| 122 |
+
this.nAtoms0 = this.asc.ac;
|
| 123 |
+
if (this.done) return;
|
| 124 |
+
}}var g = groupList[groupTypeList[j]];
|
| 125 |
+
var atomNameList = g.get ("atomNameList");
|
| 126 |
+
var len = atomNameList.length;
|
| 127 |
+
if (this.skipping) {
|
| 128 |
+
iatom += len;
|
| 129 |
+
continue;
|
| 130 |
+
}var a0 = iatom;
|
| 131 |
+
if (insCodes != null) insCode = insCodes[j];
|
| 132 |
+
seqNo = groupIdList[j];
|
| 133 |
+
var group3 = g.get ("groupName");
|
| 134 |
+
var isHetero = this.vwr.getJBR ().isHetero (group3);
|
| 135 |
+
if (isHetero) {
|
| 136 |
+
var hetName = "" + g.get ("chemCompType");
|
| 137 |
+
if (this.htHetero == null || !this.htHetero.containsKey (group3)) {
|
| 138 |
+
if (this.entities != null && hetName.equals ("NON-POLYMER")) out : for (var i = this.entities.length; --i >= 0; ) {
|
| 139 |
+
var entity = this.entities[i];
|
| 140 |
+
var chainList = entity.get ("chainIndexList");
|
| 141 |
+
for (var k = chainList.length; --k >= 0; ) if (chainList[k] == iChain) {
|
| 142 |
+
hetName = "a component of the entity \"" + entity.get ("description") + "\"";
|
| 143 |
+
break out;
|
| 144 |
+
}
|
| 145 |
+
}
|
| 146 |
+
this.addHetero (group3, hetName, false, true);
|
| 147 |
+
}}var elementList = g.get ("elementList");
|
| 148 |
+
var haveAtom = false;
|
| 149 |
+
for (var ia = 0, pt = 0; ia < len; ia++, iatom++) {
|
| 150 |
+
var a = new J.adapter.smarter.Atom ();
|
| 151 |
+
a.isHetero = isHetero;
|
| 152 |
+
if (insCode.charCodeAt (0) != 0) a.insertionCode = insCode;
|
| 153 |
+
this.setAtomCoordXYZ (a, x[iatom], y[iatom], z[iatom]);
|
| 154 |
+
a.elementSymbol = elementList[pt];
|
| 155 |
+
a.atomName = atomNameList[pt++];
|
| 156 |
+
if (seqNo >= 0) this.maxSerial = Math.max (this.maxSerial, a.sequenceNumber = seqNo);
|
| 157 |
+
a.group3 = group3;
|
| 158 |
+
this.setChainID (a, chainID);
|
| 159 |
+
if (bf != null) a.bfactor = bf[iatom];
|
| 160 |
+
if (altloc != null) a.altLoc = altloc[iatom];
|
| 161 |
+
if (occ != null) a.foccupancy = occ[iatom];
|
| 162 |
+
if (haveSerial) a.atomSerial = atomId[iatom];
|
| 163 |
+
if (!this.filterAtom (a, -1) || !this.processSubclassAtom (a, labelAsym, authAsym)) continue;
|
| 164 |
+
if (!haveAtom) {
|
| 165 |
+
thisGroup++;
|
| 166 |
+
haveAtom = true;
|
| 167 |
+
}if (haveSerial) {
|
| 168 |
+
this.asc.addAtomWithMappedSerialNumber (a);
|
| 169 |
+
} else {
|
| 170 |
+
this.asc.addAtom (a);
|
| 171 |
+
}this.atomMap[iatom] = a;
|
| 172 |
+
this.atomGroup[this.ac] = j;
|
| 173 |
+
this.groupMap[j] = this.lastGroup = thisGroup;
|
| 174 |
+
this.ac++;
|
| 175 |
+
}
|
| 176 |
+
if (!this.isCourseGrained) {
|
| 177 |
+
var bo = g.get ("bondOrderList");
|
| 178 |
+
var bi = g.get ("bondAtomList");
|
| 179 |
+
this.addMMTFBonds (bo, bi, a0, doMulti, false);
|
| 180 |
+
}}
|
| 181 |
+
this.asc.setCurrentModelInfo ("pdbID", this.$pdbID);
|
| 182 |
+
}, "~B");
|
| 183 |
+
Clazz.defineMethod (c$, "addMMTFBonds",
|
| 184 |
+
function (bo, bi, a0, doMulti, isInter) {
|
| 185 |
+
if (bi == null) return;
|
| 186 |
+
doMulti = new Boolean (doMulti & (bo != null)).valueOf ();
|
| 187 |
+
for (var bj = 0, pt = 0, nj = Clazz.doubleToInt (bi.length / 2); bj < nj; bj++) {
|
| 188 |
+
var a1 = this.atomMap[bi[pt++] + a0];
|
| 189 |
+
var a2 = this.atomMap[bi[pt++] + a0];
|
| 190 |
+
if (a1 != null && a2 != null) {
|
| 191 |
+
var bond = new J.adapter.smarter.Bond (a1.index, a2.index, doMulti ? bo[bj] : 1);
|
| 192 |
+
this.asc.addBond (bond);
|
| 193 |
+
if (JU.Logger.debugging && isInter) {
|
| 194 |
+
JU.Logger.info ("inter-group (" + (a1.atomSetIndex + 1) + "." + a1.index + "/" + (a2.atomSetIndex + 1) + "." + a2.index + ") bond " + a1.group3 + a1.sequenceNumber + "." + a1.atomName + " - " + a2.group3 + a2.sequenceNumber + "." + a2.atomName + " " + bond.order);
|
| 195 |
+
}}}
|
| 196 |
+
}, "~A,~A,~N,~B,~B");
|
| 197 |
+
Clazz.defineMethod (c$, "setMMTFSymmetry",
|
| 198 |
+
function () {
|
| 199 |
+
this.setSpaceGroupName (this.map.get ("spaceGroup"));
|
| 200 |
+
var o = this.map.get ("unitCell");
|
| 201 |
+
if (o != null) for (var i = 0; i < 6; i++) this.setUnitCellItem (i, o[i]);
|
| 202 |
+
|
| 203 |
+
});
|
| 204 |
+
Clazz.defineMethod (c$, "getMMTFBioAssembly",
|
| 205 |
+
function () {
|
| 206 |
+
var o = this.map.get ("bioAssemblyList");
|
| 207 |
+
if (o == null) return;
|
| 208 |
+
if (this.vBiomolecules == null) this.vBiomolecules = new JU.Lst ();
|
| 209 |
+
for (var i = o.length; --i >= 0; ) {
|
| 210 |
+
var info = new java.util.Hashtable ();
|
| 211 |
+
this.vBiomolecules.addLast (info);
|
| 212 |
+
var iMolecule = i + 1;
|
| 213 |
+
this.checkFilterAssembly ("" + iMolecule, info);
|
| 214 |
+
info.put ("name", "biomolecule " + iMolecule);
|
| 215 |
+
info.put ("molecule", Integer.$valueOf (iMolecule));
|
| 216 |
+
var assemb = new JU.Lst ();
|
| 217 |
+
var ops = new JU.Lst ();
|
| 218 |
+
info.put ("biomts", new JU.Lst ());
|
| 219 |
+
info.put ("chains", new JU.Lst ());
|
| 220 |
+
info.put ("assemblies", assemb);
|
| 221 |
+
info.put ("operators", ops);
|
| 222 |
+
var m = o[i];
|
| 223 |
+
var tlist = m.get ("transformList");
|
| 224 |
+
var chlist = new JU.SB ();
|
| 225 |
+
for (var j = 0, n = tlist.length; j < n; j++) {
|
| 226 |
+
var t = tlist[j];
|
| 227 |
+
chlist.setLength (0);
|
| 228 |
+
var chainList = t.get ("chainIndexList");
|
| 229 |
+
for (var k = 0, kn = chainList.length; k < kn; k++) chlist.append ("$").append (this.labelAsymList[chainList[k]]);
|
| 230 |
+
|
| 231 |
+
assemb.addLast (chlist.append ("$").toString ());
|
| 232 |
+
var id = "" + (++this.opCount);
|
| 233 |
+
this.addMatrix (id, JU.M4.newA16 (t.get ("matrix")), false);
|
| 234 |
+
ops.addLast (id);
|
| 235 |
+
}
|
| 236 |
+
}
|
| 237 |
+
});
|
| 238 |
+
Clazz.defineMethod (c$, "getStructure",
|
| 239 |
+
function () {
|
| 240 |
+
var a = this.decode ("secStructList");
|
| 241 |
+
if (JU.Logger.debugging) JU.Logger.info (JU.PT.toJSON ("secStructList", a));
|
| 242 |
+
this.bsStructures = Clazz.newArray (-1, [ new JU.BS (), null, new JU.BS (), new JU.BS (), new JU.BS (), null, new JU.BS ()]);
|
| 243 |
+
var lastGroup = -1;
|
| 244 |
+
for (var j = 0; j < a.length; j++) {
|
| 245 |
+
var type = a[j];
|
| 246 |
+
switch (type) {
|
| 247 |
+
case 0:
|
| 248 |
+
case 2:
|
| 249 |
+
case 3:
|
| 250 |
+
case 4:
|
| 251 |
+
case 6:
|
| 252 |
+
var igroup = this.groupMap[j];
|
| 253 |
+
this.bsStructures[type].set (igroup);
|
| 254 |
+
this.groupDSSP[igroup] = type + 1;
|
| 255 |
+
lastGroup = j;
|
| 256 |
+
}
|
| 257 |
+
}
|
| 258 |
+
var n = (this.isDSSP1 ? this.asc.iSet : this.groupModels[lastGroup]);
|
| 259 |
+
if (lastGroup >= 0) {
|
| 260 |
+
this.haveStructure = true;
|
| 261 |
+
this.asc.addStructure ( new J.adapter.smarter.Structure (n, null, null, null, 0, 0, this.bsStructures));
|
| 262 |
+
}});
|
| 263 |
+
Clazz.defineMethod (c$, "addStructureSymmetry",
|
| 264 |
+
function () {
|
| 265 |
+
if (this.asc.ac == 0) return;
|
| 266 |
+
var atoms = this.asc.atoms;
|
| 267 |
+
var bsAtoms = this.asc.bsAtoms;
|
| 268 |
+
var ptGroup = this.lastGroup;
|
| 269 |
+
var mygroup = -1;
|
| 270 |
+
for (var i = this.ac0, n = this.asc.ac; i < n; i++) {
|
| 271 |
+
if (bsAtoms == null || bsAtoms.get (i)) {
|
| 272 |
+
var a = atoms[i];
|
| 273 |
+
var igroup = this.atomGroup[a.atomSite];
|
| 274 |
+
if (igroup != mygroup) {
|
| 275 |
+
mygroup = igroup;
|
| 276 |
+
ptGroup++;
|
| 277 |
+
}var dssp = this.groupDSSP[igroup];
|
| 278 |
+
if (dssp > 0) {
|
| 279 |
+
this.bsStructures[dssp - 1].set (ptGroup);
|
| 280 |
+
}}}
|
| 281 |
+
});
|
| 282 |
+
Clazz.defineMethod (c$, "decode",
|
| 283 |
+
function (key) {
|
| 284 |
+
return JU.MessagePackReader.decode (this.map.get (key));
|
| 285 |
+
}, "~S");
|
| 286 |
+
});
|
static/j2s/J/adapter/readers/cif/MSCifParser.js
ADDED
|
@@ -0,0 +1,383 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Clazz.declarePackage ("J.adapter.readers.cif");
|
| 2 |
+
Clazz.load (["J.adapter.readers.cif.MSRdr"], "J.adapter.readers.cif.MSCifParser", ["java.lang.Character", "$.Double", "JU.M3", "$.Matrix", "$.PT", "J.adapter.readers.cif.Cif2DataParser"], function () {
|
| 3 |
+
c$ = Clazz.decorateAsClass (function () {
|
| 4 |
+
this.field = null;
|
| 5 |
+
this.comSSMat = null;
|
| 6 |
+
Clazz.instantialize (this, arguments);
|
| 7 |
+
}, J.adapter.readers.cif, "MSCifParser", J.adapter.readers.cif.MSRdr);
|
| 8 |
+
Clazz.makeConstructor (c$,
|
| 9 |
+
function () {
|
| 10 |
+
Clazz.superConstructor (this, J.adapter.readers.cif.MSCifParser, []);
|
| 11 |
+
});
|
| 12 |
+
Clazz.defineMethod (c$, "processEntry",
|
| 13 |
+
function () {
|
| 14 |
+
var cr = this.cr;
|
| 15 |
+
if (cr.key.equals ("_cell_commen_t_section_1")) {
|
| 16 |
+
this.isCommensurate = true;
|
| 17 |
+
this.commensurateSection1 = cr.parseIntStr (cr.data);
|
| 18 |
+
}if (cr.key.startsWith ("_cell_commen_supercell_matrix")) {
|
| 19 |
+
this.isCommensurate = true;
|
| 20 |
+
if (this.comSSMat == null) this.comSSMat = JU.M3.newM3 (null);
|
| 21 |
+
var tokens = JU.PT.split (cr.key, "_");
|
| 22 |
+
var r = cr.parseIntStr (tokens[tokens.length - 2]);
|
| 23 |
+
var c = cr.parseIntStr (tokens[tokens.length - 1]);
|
| 24 |
+
if (r > 0 && c > 0) this.comSSMat.setElement (r - 1, c - 1, cr.parseFloatStr (cr.data));
|
| 25 |
+
}});
|
| 26 |
+
Clazz.defineMethod (c$, "processLoopBlock",
|
| 27 |
+
function () {
|
| 28 |
+
var cr = this.cr;
|
| 29 |
+
var key = cr.key;
|
| 30 |
+
if (key.equals ("_cell_subsystem_code")) return this.processSubsystemLoopBlock ();
|
| 31 |
+
if (!key.startsWith ("_cell_wave") && !key.contains ("fourier") && !key.contains ("legendre") && !key.contains ("_special_func")) {
|
| 32 |
+
if (key.contains ("crenel_ortho")) cr.appendLoadNote ("WARNING: Orthogonalized non-Legendre functions not supported.\nThe following block has been ignored. Use Legendre functions instead.\n\n" + cr.parser.skipLoop (true) + "=================================\n");
|
| 33 |
+
return 0;
|
| 34 |
+
}if (cr.asc.iSet < 0) cr.asc.newAtomSet ();
|
| 35 |
+
cr.parseLoopParametersFor ("_atom_site", J.adapter.readers.cif.MSCifParser.modulationFields);
|
| 36 |
+
var tok;
|
| 37 |
+
while (cr.parser.getData ()) {
|
| 38 |
+
var ignore = false;
|
| 39 |
+
var type_id = null;
|
| 40 |
+
var atomLabel = null;
|
| 41 |
+
var axis = null;
|
| 42 |
+
var pt = Clazz.newDoubleArray (-1, [NaN, NaN, NaN]);
|
| 43 |
+
var q = null;
|
| 44 |
+
var c = NaN;
|
| 45 |
+
var w = NaN;
|
| 46 |
+
var fid = null;
|
| 47 |
+
var n = cr.parser.getColumnCount ();
|
| 48 |
+
for (var i = 0; i < n; ++i) {
|
| 49 |
+
switch (tok = this.fieldProperty (cr, i)) {
|
| 50 |
+
case 1:
|
| 51 |
+
cr.haveCellWaveVector = true;
|
| 52 |
+
case 0:
|
| 53 |
+
case 41:
|
| 54 |
+
case 42:
|
| 55 |
+
case 43:
|
| 56 |
+
pt[0] = pt[1] = pt[2] = 0;
|
| 57 |
+
case 14:
|
| 58 |
+
case 26:
|
| 59 |
+
case 51:
|
| 60 |
+
case 36:
|
| 61 |
+
case 44:
|
| 62 |
+
case 45:
|
| 63 |
+
case 46:
|
| 64 |
+
switch (tok) {
|
| 65 |
+
case 1:
|
| 66 |
+
type_id = "W_";
|
| 67 |
+
break;
|
| 68 |
+
case 0:
|
| 69 |
+
type_id = "F_";
|
| 70 |
+
fid = this.field;
|
| 71 |
+
break;
|
| 72 |
+
case 41:
|
| 73 |
+
case 42:
|
| 74 |
+
case 43:
|
| 75 |
+
fid = "?" + this.field;
|
| 76 |
+
pt[2] = 1;
|
| 77 |
+
continue;
|
| 78 |
+
case 44:
|
| 79 |
+
case 45:
|
| 80 |
+
case 46:
|
| 81 |
+
atomLabel = axis = "*";
|
| 82 |
+
case 14:
|
| 83 |
+
case 26:
|
| 84 |
+
case 51:
|
| 85 |
+
case 36:
|
| 86 |
+
type_id = Character.toUpperCase (J.adapter.readers.cif.MSCifParser.modulationFields[tok].charAt (11)) + "_";
|
| 87 |
+
break;
|
| 88 |
+
}
|
| 89 |
+
type_id += this.field;
|
| 90 |
+
break;
|
| 91 |
+
case 47:
|
| 92 |
+
type_id = "J_O";
|
| 93 |
+
pt[0] = pt[2] = 1;
|
| 94 |
+
axis = "0";
|
| 95 |
+
atomLabel = this.field;
|
| 96 |
+
break;
|
| 97 |
+
case 31:
|
| 98 |
+
type_id = "O_0";
|
| 99 |
+
axis = "0";
|
| 100 |
+
atomLabel = this.field;
|
| 101 |
+
break;
|
| 102 |
+
case 19:
|
| 103 |
+
type_id = "D_S";
|
| 104 |
+
axis = "0";
|
| 105 |
+
atomLabel = this.field;
|
| 106 |
+
break;
|
| 107 |
+
case 56:
|
| 108 |
+
type_id = "M_T";
|
| 109 |
+
axis = "0";
|
| 110 |
+
atomLabel = this.field;
|
| 111 |
+
break;
|
| 112 |
+
case 62:
|
| 113 |
+
type_id = "D_L";
|
| 114 |
+
atomLabel = this.field;
|
| 115 |
+
break;
|
| 116 |
+
case 66:
|
| 117 |
+
type_id = "U_L";
|
| 118 |
+
atomLabel = this.field;
|
| 119 |
+
break;
|
| 120 |
+
case 70:
|
| 121 |
+
type_id = "O_L";
|
| 122 |
+
atomLabel = this.field;
|
| 123 |
+
break;
|
| 124 |
+
case 12:
|
| 125 |
+
case 25:
|
| 126 |
+
case 49:
|
| 127 |
+
case 34:
|
| 128 |
+
atomLabel = this.field;
|
| 129 |
+
break;
|
| 130 |
+
case 13:
|
| 131 |
+
case 50:
|
| 132 |
+
case 63:
|
| 133 |
+
axis = this.field;
|
| 134 |
+
if (this.modAxes != null && this.modAxes.indexOf (axis.toUpperCase ()) < 0) ignore = true;
|
| 135 |
+
break;
|
| 136 |
+
case 67:
|
| 137 |
+
case 35:
|
| 138 |
+
axis = this.field.toUpperCase ();
|
| 139 |
+
break;
|
| 140 |
+
case 8:
|
| 141 |
+
q = J.adapter.readers.cif.Cif2DataParser.getArrayFromStringList (this.field, this.modDim);
|
| 142 |
+
break;
|
| 143 |
+
default:
|
| 144 |
+
var f = cr.parseFloatStr (this.field);
|
| 145 |
+
switch (tok) {
|
| 146 |
+
case 65:
|
| 147 |
+
case 72:
|
| 148 |
+
case 69:
|
| 149 |
+
pt[0] = f;
|
| 150 |
+
if (f != 0) pt[2] = 0;
|
| 151 |
+
break;
|
| 152 |
+
case 28:
|
| 153 |
+
case 32:
|
| 154 |
+
case 16:
|
| 155 |
+
case 53:
|
| 156 |
+
case 38:
|
| 157 |
+
case 78:
|
| 158 |
+
case 74:
|
| 159 |
+
case 76:
|
| 160 |
+
pt[2] = 0;
|
| 161 |
+
case 2:
|
| 162 |
+
case 5:
|
| 163 |
+
case 20:
|
| 164 |
+
case 57:
|
| 165 |
+
pt[0] = f;
|
| 166 |
+
break;
|
| 167 |
+
case 9:
|
| 168 |
+
if (q == null) q = Clazz.newDoubleArray (this.modDim, 0);
|
| 169 |
+
q[0] = f;
|
| 170 |
+
break;
|
| 171 |
+
case 10:
|
| 172 |
+
if (q == null) q = Clazz.newDoubleArray (this.modDim, 0);
|
| 173 |
+
q[1] = f;
|
| 174 |
+
break;
|
| 175 |
+
case 11:
|
| 176 |
+
if (q == null) q = Clazz.newDoubleArray (this.modDim, 0);
|
| 177 |
+
q[2] = f;
|
| 178 |
+
break;
|
| 179 |
+
case 17:
|
| 180 |
+
case 29:
|
| 181 |
+
case 54:
|
| 182 |
+
case 39:
|
| 183 |
+
pt[0] = f;
|
| 184 |
+
pt[2] = 1;
|
| 185 |
+
break;
|
| 186 |
+
case 71:
|
| 187 |
+
case 75:
|
| 188 |
+
case 27:
|
| 189 |
+
axis = "0";
|
| 190 |
+
case 64:
|
| 191 |
+
case 68:
|
| 192 |
+
case 3:
|
| 193 |
+
case 6:
|
| 194 |
+
case 18:
|
| 195 |
+
case 30:
|
| 196 |
+
case 55:
|
| 197 |
+
case 40:
|
| 198 |
+
case 33:
|
| 199 |
+
case 21:
|
| 200 |
+
case 58:
|
| 201 |
+
case 48:
|
| 202 |
+
case 15:
|
| 203 |
+
case 52:
|
| 204 |
+
case 37:
|
| 205 |
+
case 73:
|
| 206 |
+
case 77:
|
| 207 |
+
pt[1] = f;
|
| 208 |
+
break;
|
| 209 |
+
case 4:
|
| 210 |
+
case 7:
|
| 211 |
+
case 22:
|
| 212 |
+
case 59:
|
| 213 |
+
pt[2] = f;
|
| 214 |
+
break;
|
| 215 |
+
case 23:
|
| 216 |
+
case 60:
|
| 217 |
+
c = f;
|
| 218 |
+
break;
|
| 219 |
+
case 24:
|
| 220 |
+
case 61:
|
| 221 |
+
w = f;
|
| 222 |
+
break;
|
| 223 |
+
}
|
| 224 |
+
break;
|
| 225 |
+
}
|
| 226 |
+
}
|
| 227 |
+
if (ignore || type_id == null && q == null || atomLabel != null && !atomLabel.equals ("*") && cr.rejectAtomName (atomLabel)) continue;
|
| 228 |
+
var ok = true;
|
| 229 |
+
if (q != null) {
|
| 230 |
+
for (var j = 0, nzero = q.length; j < q.length; j++) if (Double.isNaN (q[j]) || q[j] > 1e100 || q[j] == 0 && --nzero == 0) {
|
| 231 |
+
ok = false;
|
| 232 |
+
}
|
| 233 |
+
if (!ok) continue;
|
| 234 |
+
this.addMod ("F_coefs_", fid, q);
|
| 235 |
+
pt[0] = NaN;
|
| 236 |
+
}for (var j = 0, nzero = pt.length; j < pt.length; j++) if (Double.isNaN (pt[j]) || pt[j] > 1e100 || pt[j] == 0 && --nzero == 0) {
|
| 237 |
+
ok = false;
|
| 238 |
+
break;
|
| 239 |
+
}
|
| 240 |
+
if (!ok) continue;
|
| 241 |
+
switch (type_id.charAt (0)) {
|
| 242 |
+
case 'C':
|
| 243 |
+
case 'D':
|
| 244 |
+
case 'O':
|
| 245 |
+
case 'M':
|
| 246 |
+
case 'U':
|
| 247 |
+
case 'J':
|
| 248 |
+
if (atomLabel == null || axis == null) continue;
|
| 249 |
+
if (type_id.equals ("D_S") || type_id.equals ("M_T")) {
|
| 250 |
+
if (Double.isNaN (c) || Double.isNaN (w)) continue;
|
| 251 |
+
if (pt[0] != 0) this.addMod (type_id + "#x;" + atomLabel, fid, Clazz.newDoubleArray (-1, [c, w, pt[0]]));
|
| 252 |
+
if (pt[1] != 0) this.addMod (type_id + "#y;" + atomLabel, fid, Clazz.newDoubleArray (-1, [c, w, pt[1]]));
|
| 253 |
+
if (pt[2] != 0) this.addMod (type_id + "#z;" + atomLabel, fid, Clazz.newDoubleArray (-1, [c, w, pt[2]]));
|
| 254 |
+
continue;
|
| 255 |
+
}if (type_id.indexOf ("_L") == 1) {
|
| 256 |
+
if (type_id.startsWith ("U")) type_id += Clazz.doubleToInt (pt[1]);
|
| 257 |
+
else axis += Clazz.doubleToInt (pt[1]);
|
| 258 |
+
}type_id += "#" + axis + ";" + atomLabel;
|
| 259 |
+
break;
|
| 260 |
+
}
|
| 261 |
+
this.addMod (type_id, fid, pt);
|
| 262 |
+
}
|
| 263 |
+
return 1;
|
| 264 |
+
});
|
| 265 |
+
Clazz.defineMethod (c$, "addMod",
|
| 266 |
+
function (id, fid, params) {
|
| 267 |
+
if (fid != null) id += fid;
|
| 268 |
+
this.addModulation (null, id, params, -1);
|
| 269 |
+
}, "~S,~S,~A");
|
| 270 |
+
Clazz.defineMethod (c$, "processSubsystemLoopBlock",
|
| 271 |
+
function () {
|
| 272 |
+
var cr = this.cr;
|
| 273 |
+
cr.parseLoopParameters (null);
|
| 274 |
+
while (cr.parser.getData ()) {
|
| 275 |
+
this.fieldProperty (cr, 0);
|
| 276 |
+
var id = this.field;
|
| 277 |
+
this.addSubsystem (id, this.getSparseMatrix (cr, "_w_", 1, 3 + this.modDim));
|
| 278 |
+
}
|
| 279 |
+
return 1;
|
| 280 |
+
});
|
| 281 |
+
Clazz.defineMethod (c$, "getSparseMatrix",
|
| 282 |
+
function (cr, term, i, dim) {
|
| 283 |
+
var m = new JU.Matrix (null, dim, dim);
|
| 284 |
+
var a = m.getArray ();
|
| 285 |
+
var key;
|
| 286 |
+
var p;
|
| 287 |
+
var n = cr.parser.getColumnCount ();
|
| 288 |
+
for (; i < n; ++i) {
|
| 289 |
+
if ((p = this.fieldProperty (cr, i)) < 0 || !(key = cr.parser.getColumnName (p)).contains (term)) continue;
|
| 290 |
+
var tokens = JU.PT.split (key, "_");
|
| 291 |
+
var r = cr.parseIntStr (tokens[tokens.length - 2]);
|
| 292 |
+
var c = cr.parseIntStr (tokens[tokens.length - 1]);
|
| 293 |
+
if (r > 0 && c > 0) a[r - 1][c - 1] = cr.parseFloatStr (this.field);
|
| 294 |
+
}
|
| 295 |
+
return m;
|
| 296 |
+
}, "J.adapter.readers.cif.CifReader,~S,~N,~N");
|
| 297 |
+
Clazz.defineMethod (c$, "fieldProperty",
|
| 298 |
+
function (cr, i) {
|
| 299 |
+
return ((this.field = cr.parser.getColumnData (i)).length > 0 && this.field.charAt (0) != '\0' ? cr.col2key[i] : -1);
|
| 300 |
+
}, "J.adapter.readers.cif.CifReader,~N");
|
| 301 |
+
Clazz.defineStatics (c$,
|
| 302 |
+
"FWV_ID", 0,
|
| 303 |
+
"WV_ID", 1,
|
| 304 |
+
"WV_X", 2,
|
| 305 |
+
"WV_Y", 3,
|
| 306 |
+
"WV_Z", 4,
|
| 307 |
+
"FWV_X", 5,
|
| 308 |
+
"FWV_Y", 6,
|
| 309 |
+
"FWV_Z", 7,
|
| 310 |
+
"FWV_Q_COEF", 8,
|
| 311 |
+
"JANA_FWV_Q1_COEF", 9,
|
| 312 |
+
"JANA_FWV_Q2_COEF", 10,
|
| 313 |
+
"JANA_FWV_Q3_COEF", 11,
|
| 314 |
+
"FWV_DISP_LABEL", 12,
|
| 315 |
+
"FWV_DISP_AXIS", 13,
|
| 316 |
+
"FWV_DISP_SEQ_ID", 14,
|
| 317 |
+
"FWV_DISP_COS", 15,
|
| 318 |
+
"FWV_DISP_SIN", 16,
|
| 319 |
+
"FWV_DISP_MODULUS", 17,
|
| 320 |
+
"FWV_DISP_PHASE", 18,
|
| 321 |
+
"DISP_SPEC_LABEL", 19,
|
| 322 |
+
"DISP_SAW_AX", 20,
|
| 323 |
+
"DISP_SAW_AY", 21,
|
| 324 |
+
"DISP_SAW_AZ", 22,
|
| 325 |
+
"DISP_SAW_C", 23,
|
| 326 |
+
"DISP_SAW_W", 24,
|
| 327 |
+
"FWV_OCC_LABEL", 25,
|
| 328 |
+
"FWV_OCC_SEQ_ID", 26,
|
| 329 |
+
"FWV_OCC_COS", 27,
|
| 330 |
+
"FWV_OCC_SIN", 28,
|
| 331 |
+
"FWV_OCC_MODULUS", 29,
|
| 332 |
+
"FWV_OCC_PHASE", 30,
|
| 333 |
+
"OCC_SPECIAL_LABEL", 31,
|
| 334 |
+
"OCC_CRENEL_C", 32,
|
| 335 |
+
"OCC_CRENEL_W", 33,
|
| 336 |
+
"FWV_U_LABEL", 34,
|
| 337 |
+
"FWV_U_TENS", 35,
|
| 338 |
+
"FWV_U_SEQ_ID", 36,
|
| 339 |
+
"FWV_U_COS", 37,
|
| 340 |
+
"FWV_U_SIN", 38,
|
| 341 |
+
"FWV_U_MODULUS", 39,
|
| 342 |
+
"FWV_U_PHASE", 40,
|
| 343 |
+
"FD_ID", 41,
|
| 344 |
+
"FO_ID", 42,
|
| 345 |
+
"FU_ID", 43,
|
| 346 |
+
"FDP_ID", 44,
|
| 347 |
+
"FOP_ID", 45,
|
| 348 |
+
"FUP_ID", 46,
|
| 349 |
+
"JANA_OCC_ABS_LABEL", 47,
|
| 350 |
+
"JANA_OCC_ABS_O_0", 48,
|
| 351 |
+
"FWV_SPIN_LABEL", 49,
|
| 352 |
+
"FWV_SPIN_AXIS", 50,
|
| 353 |
+
"FWV_SPIN_SEQ_ID", 51,
|
| 354 |
+
"FWV_SPIN_COS", 52,
|
| 355 |
+
"FWV_SPIN_SIN", 53,
|
| 356 |
+
"FWV_SPIN_MODULUS", 54,
|
| 357 |
+
"FWV_SPIN_PHASE", 55,
|
| 358 |
+
"SPIN_SPEC_LABEL", 56,
|
| 359 |
+
"SPIN_SAW_AX", 57,
|
| 360 |
+
"SPIN_SAW_AY", 58,
|
| 361 |
+
"SPIN_SAW_AZ", 59,
|
| 362 |
+
"SPIN_SAW_C", 60,
|
| 363 |
+
"SPIN_SAW_W", 61,
|
| 364 |
+
"LEG_DISP_LABEL", 62,
|
| 365 |
+
"LEG_DISP_AXIS", 63,
|
| 366 |
+
"LEG_DISP_ORDER", 64,
|
| 367 |
+
"LEG_DISP_COEF", 65,
|
| 368 |
+
"LEG_U_LABEL", 66,
|
| 369 |
+
"LEG_U_TENS", 67,
|
| 370 |
+
"LEG_U_ORDER", 68,
|
| 371 |
+
"LEG_U_COEF", 69,
|
| 372 |
+
"LEG_OCC_LABEL", 70,
|
| 373 |
+
"LEG_OCC_ORDER", 71,
|
| 374 |
+
"LEG_OCC_COEF", 72,
|
| 375 |
+
"DEPR_FD_COS", 73,
|
| 376 |
+
"DEPR_FD_SIN", 74,
|
| 377 |
+
"DEPR_FO_COS", 75,
|
| 378 |
+
"DEPR_FO_SIN", 76,
|
| 379 |
+
"DEPR_FU_COS", 77,
|
| 380 |
+
"DEPR_FU_SIN", 78,
|
| 381 |
+
"modulationFields", Clazz.newArray (-1, ["*_fourier_wave_vector_seq_id", "_cell_wave_vector_seq_id", "_cell_wave_vector_x", "_cell_wave_vector_y", "_cell_wave_vector_z", "*_fourier_wave_vector_x", "*_fourier_wave_vector_y", "*_fourier_wave_vector_z", "*_fourier_wave_vector_q_coeff", "*_fourier_wave_vector_q1_coeff", "*_fourier_wave_vector_q2_coeff", "*_fourier_wave_vector_q3_coeff", "*_displace_fourier_atom_site_label", "*_displace_fourier_axis", "*_displace_fourier_wave_vector_seq_id", "*_displace_fourier_param_cos", "*_displace_fourier_param_sin", "*_displace_fourier_param_modulus", "*_displace_fourier_param_phase", "*_displace_special_func_atom_site_label", "*_displace_special_func_sawtooth_ax", "*_displace_special_func_sawtooth_ay", "*_displace_special_func_sawtooth_az", "*_displace_special_func_sawtooth_c", "*_displace_special_func_sawtooth_w", "*_occ_fourier_atom_site_label", "*_occ_fourier_wave_vector_seq_id", "*_occ_fourier_param_cos", "*_occ_fourier_param_sin", "*_occ_fourier_param_modulus", "*_occ_fourier_param_phase", "*_occ_special_func_atom_site_label", "*_occ_special_func_crenel_c", "*_occ_special_func_crenel_w", "*_u_fourier_atom_site_label", "*_u_fourier_tens_elem", "*_u_fourier_wave_vector_seq_id", "*_u_fourier_param_cos", "*_u_fourier_param_sin", "*_u_fourier_param_modulus", "*_u_fourier_param_phase", "*_displace_fourier_id", "*_occ_fourier_id", "*_u_fourier_id", "*_displace_fourier_param_id", "*_occ_fourier_param_id", "*_u_fourier_param_id", "*_occ_fourier_absolute_site_label", "*_occ_fourier_absolute", "*_moment_fourier_atom_site_label", "*_moment_fourier_axis", "*_moment_fourier_wave_vector_seq_id", "*_moment_fourier_param_cos", "*_moment_fourier_param_sin", "*_moment_fourier_param_modulus", "*_moment_fourier_param_phase", "*_moment_special_func_atom_site_label", "*_moment_special_func_sawtooth_ax", "*_moment_special_func_sawtooth_ay", "*_moment_special_func_sawtooth_az", "*_moment_special_func_sawtooth_c", "*_moment_special_func_sawtooth_w", "*_displace_legendre_atom_site_label", "*_displace_legendre_axis", "*_displace_legendre_param_order", "*_displace_legendre_param_coeff", "*_u_legendre_atom_site_label", "*_u_legendre_tens_elem", "*_u_legendre_param_order", "*_u_legendre_param_coeff", "*_occ_legendre_atom_site_label", "*_occ_legendre_param_order", "*_occ_legendre_param_coeff", "*_displace_fourier_cos", "*_displace_fourier_sin", "*_occ_fourier_cos", "*_occ_fourier_sin", "*_u_fourier_cos", "*_u_fourier_sin"]),
|
| 382 |
+
"NONE", -1);
|
| 383 |
+
});
|
static/j2s/J/adapter/readers/cif/MSRdr.js
ADDED
|
@@ -0,0 +1,593 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Clazz.declarePackage ("J.adapter.readers.cif");
|
| 2 |
+
Clazz.load (["J.adapter.smarter.MSInterface"], "J.adapter.readers.cif.MSRdr", ["java.lang.Boolean", "$.Exception", "$.Float", "java.util.Hashtable", "JU.Lst", "$.M3", "$.Matrix", "$.P3", "$.PT", "J.adapter.readers.cif.Subsystem", "J.adapter.smarter.AtomSetCollectionReader", "JU.BSUtil", "$.BoxInfo", "$.Escape", "$.Logger", "$.Modulation", "$.ModulationSet", "$.Vibration"], function () {
|
| 3 |
+
c$ = Clazz.decorateAsClass (function () {
|
| 4 |
+
this.cr = null;
|
| 5 |
+
this.modDim = 0;
|
| 6 |
+
this.modAxes = null;
|
| 7 |
+
this.modAverage = false;
|
| 8 |
+
this.isCommensurate = false;
|
| 9 |
+
this.commensurateSection1 = 0;
|
| 10 |
+
this.modPack = false;
|
| 11 |
+
this.modVib = false;
|
| 12 |
+
this.modType = null;
|
| 13 |
+
this.modCell = null;
|
| 14 |
+
this.modDebug = false;
|
| 15 |
+
this.modSelected = -1;
|
| 16 |
+
this.modLast = false;
|
| 17 |
+
this.sigma = null;
|
| 18 |
+
this.htModulation = null;
|
| 19 |
+
this.htAtomMods = null;
|
| 20 |
+
this.iopLast = -1;
|
| 21 |
+
this.gammaE = null;
|
| 22 |
+
this.nOps = 0;
|
| 23 |
+
this.haveOccupancy = false;
|
| 24 |
+
this.atoms = null;
|
| 25 |
+
this.ac = 0;
|
| 26 |
+
this.haveAtomMods = false;
|
| 27 |
+
this.modCoord = false;
|
| 28 |
+
this.finalized = false;
|
| 29 |
+
this.symmetry = null;
|
| 30 |
+
this.supercellSymmetry = null;
|
| 31 |
+
this.legendres = null;
|
| 32 |
+
this.atModel = "@0";
|
| 33 |
+
this.modMatrices = null;
|
| 34 |
+
this.qlist100 = null;
|
| 35 |
+
this.qs = null;
|
| 36 |
+
this.modCount = 0;
|
| 37 |
+
this.modTUV = null;
|
| 38 |
+
this.htSubsystems = null;
|
| 39 |
+
this.minXYZ0 = null;
|
| 40 |
+
this.maxXYZ0 = null;
|
| 41 |
+
Clazz.instantialize (this, arguments);
|
| 42 |
+
}, J.adapter.readers.cif, "MSRdr", null, J.adapter.smarter.MSInterface);
|
| 43 |
+
Clazz.defineMethod (c$, "getSigma",
|
| 44 |
+
function () {
|
| 45 |
+
return this.sigma;
|
| 46 |
+
});
|
| 47 |
+
Clazz.makeConstructor (c$,
|
| 48 |
+
function () {
|
| 49 |
+
});
|
| 50 |
+
Clazz.overrideMethod (c$, "initialize",
|
| 51 |
+
function (r, modDim) {
|
| 52 |
+
this.cr = r;
|
| 53 |
+
this.modCoord = r.checkFilterKey ("MODCOORD");
|
| 54 |
+
this.modDebug = r.checkFilterKey ("MODDEBUG");
|
| 55 |
+
this.modPack = !r.checkFilterKey ("MODNOPACK");
|
| 56 |
+
this.modLast = r.checkFilterKey ("MODLAST");
|
| 57 |
+
this.modAxes = r.getFilter ("MODAXES=");
|
| 58 |
+
this.modType = r.getFilter ("MODTYPE=");
|
| 59 |
+
this.modCell = r.getFilter ("MODCELL=");
|
| 60 |
+
this.modSelected = r.parseIntStr ("" + r.getFilter ("MOD="));
|
| 61 |
+
this.modVib = r.checkFilterKey ("MODVIB");
|
| 62 |
+
this.modAverage = r.checkFilterKey ("MODAVE");
|
| 63 |
+
var smodTUV = r.getFilter ("MODT=");
|
| 64 |
+
if (smodTUV != null || (smodTUV = r.getFilter ("MODTUV=")) != null) {
|
| 65 |
+
this.modTUV = new JU.P3 ();
|
| 66 |
+
var tuv = (JU.PT.replaceAllCharacters (smodTUV, "{}()", "") + ",0,0,0").$plit (",");
|
| 67 |
+
this.modTUV.x = JU.PT.parseFloatFraction (tuv[0]);
|
| 68 |
+
this.modTUV.y = JU.PT.parseFloatFraction (tuv[1]);
|
| 69 |
+
this.modTUV.z = JU.PT.parseFloatFraction (tuv[2]);
|
| 70 |
+
if (Float.isNaN (this.modTUV.lengthSquared ())) {
|
| 71 |
+
JU.Logger.error ("MSRdr cannot read modTUV=" + smodTUV);
|
| 72 |
+
this.modTUV = null;
|
| 73 |
+
}}this.setModDim (modDim);
|
| 74 |
+
return modDim;
|
| 75 |
+
}, "J.adapter.smarter.AtomSetCollectionReader,~N");
|
| 76 |
+
Clazz.defineMethod (c$, "setSubsystemOptions",
|
| 77 |
+
function () {
|
| 78 |
+
this.cr.forceSymmetry (this.modPack);
|
| 79 |
+
if (this.modCell != null) this.cr.addJmolScript ("unitcell {%" + this.modCell + "}");
|
| 80 |
+
});
|
| 81 |
+
Clazz.defineMethod (c$, "setModDim",
|
| 82 |
+
function (ndim) {
|
| 83 |
+
this.htModulation = new java.util.Hashtable ();
|
| 84 |
+
this.modDim = ndim;
|
| 85 |
+
this.cr.appendLoadNote ("Modulation dimension = " + this.modDim);
|
| 86 |
+
}, "~N");
|
| 87 |
+
Clazz.overrideMethod (c$, "addModulation",
|
| 88 |
+
function (map, id, pt, iModel) {
|
| 89 |
+
var ch = id.charAt (0);
|
| 90 |
+
switch (ch) {
|
| 91 |
+
case 'O':
|
| 92 |
+
case 'D':
|
| 93 |
+
case 'M':
|
| 94 |
+
case 'U':
|
| 95 |
+
if (this.modType != null && this.modType.indexOf (ch) < 0 || this.modSelected > 0 && this.modSelected != 1) return;
|
| 96 |
+
break;
|
| 97 |
+
}
|
| 98 |
+
var isOK = false;
|
| 99 |
+
for (var i = pt.length; --i >= 0; ) {
|
| 100 |
+
if (this.modSelected > 0 && i + 1 != this.modSelected && id.contains ("_coefs_")) {
|
| 101 |
+
pt[i] = 0;
|
| 102 |
+
} else if (pt[i] != 0) {
|
| 103 |
+
isOK = true;
|
| 104 |
+
break;
|
| 105 |
+
}}
|
| 106 |
+
if (!isOK) return;
|
| 107 |
+
if (map == null) map = this.htModulation;
|
| 108 |
+
if (id.indexOf ("@") < 0) id += "@" + (iModel >= 0 ? iModel : this.cr.asc.iSet);
|
| 109 |
+
if (id.startsWith ("D_L#") || id.startsWith ("U_L")) {
|
| 110 |
+
if (this.legendres == null) this.legendres = new JU.Lst ();
|
| 111 |
+
this.legendres.addLast (id);
|
| 112 |
+
}JU.Logger.info ("Adding " + id + " " + JU.Escape.e (pt));
|
| 113 |
+
map.put (id, pt);
|
| 114 |
+
}, "java.util.Map,~S,~A,~N");
|
| 115 |
+
Clazz.overrideMethod (c$, "setModulation",
|
| 116 |
+
function (isPost, symmetry) {
|
| 117 |
+
if (this.modDim == 0 || this.htModulation == null) return;
|
| 118 |
+
if (this.modDebug) JU.Logger.debugging = JU.Logger.debuggingHigh = true;
|
| 119 |
+
this.cr.asc.setInfo ("someModelsAreModulated", Boolean.TRUE);
|
| 120 |
+
this.symmetry = symmetry;
|
| 121 |
+
this.setModulationForStructure (this.cr.asc.iSet, isPost);
|
| 122 |
+
if (this.modDebug) JU.Logger.debugging = JU.Logger.debuggingHigh = false;
|
| 123 |
+
}, "~B,J.api.SymmetryInterface");
|
| 124 |
+
Clazz.overrideMethod (c$, "finalizeModulation",
|
| 125 |
+
function () {
|
| 126 |
+
if (!this.finalized && this.modDim > 0 && !this.modVib) {
|
| 127 |
+
if (this.modTUV != null) this.cr.appendLoadNote ("modTUV=" + this.modTUV);
|
| 128 |
+
this.cr.asc.setInfo ("modulationOn", this.modTUV == null ? Boolean.TRUE : this.modTUV);
|
| 129 |
+
this.cr.addJmolScript ("set modulateOccupancy " + (this.haveOccupancy && !this.isCommensurate ? true : false));
|
| 130 |
+
}this.finalized = true;
|
| 131 |
+
});
|
| 132 |
+
Clazz.defineMethod (c$, "checkKey",
|
| 133 |
+
function (key, checkQ) {
|
| 134 |
+
var pt = key.indexOf (this.atModel);
|
| 135 |
+
return (pt < 0 || key.indexOf ("_pos#") >= 0 || key.indexOf ("*;*") >= 0 || checkQ && key.indexOf ("?") >= 0 ? null : key.substring (0, pt));
|
| 136 |
+
}, "~S,~B");
|
| 137 |
+
Clazz.overrideMethod (c$, "getMod",
|
| 138 |
+
function (key) {
|
| 139 |
+
return this.htModulation.get (key + this.atModel);
|
| 140 |
+
}, "~S");
|
| 141 |
+
Clazz.overrideMethod (c$, "getModulationMap",
|
| 142 |
+
function () {
|
| 143 |
+
return this.htModulation;
|
| 144 |
+
});
|
| 145 |
+
Clazz.defineMethod (c$, "setModulationForStructure",
|
| 146 |
+
function (iModel, isPost) {
|
| 147 |
+
this.atModel = "@" + iModel;
|
| 148 |
+
if (this.htModulation.containsKey ("X_" + this.atModel)) return;
|
| 149 |
+
if (!isPost) {
|
| 150 |
+
this.initModForStructure (iModel);
|
| 151 |
+
return;
|
| 152 |
+
}this.htModulation.put ("X_" + this.atModel, Clazz.newDoubleArray (0, 0));
|
| 153 |
+
this.cr.appendLoadNote (this.modCount + " modulations for " + this.ac + " modulated atoms");
|
| 154 |
+
if (!this.haveAtomMods) return;
|
| 155 |
+
var n = this.cr.asc.ac;
|
| 156 |
+
this.atoms = this.cr.asc.atoms;
|
| 157 |
+
if (this.symmetry != null) this.nOps = this.symmetry.getSpaceGroupOperationCount ();
|
| 158 |
+
this.supercellSymmetry = this.cr.asc.getXSymmetry ().symmetry;
|
| 159 |
+
if (this.supercellSymmetry === this.symmetry) this.supercellSymmetry = null;
|
| 160 |
+
this.iopLast = -1;
|
| 161 |
+
var i0 = this.cr.asc.getLastAtomSetAtomIndex ();
|
| 162 |
+
for (var i = i0; i < n; i++) this.modulateAtom (this.atoms[i]);
|
| 163 |
+
|
| 164 |
+
this.htAtomMods = null;
|
| 165 |
+
if (this.minXYZ0 != null) this.trimAtomSet ();
|
| 166 |
+
this.htSubsystems = null;
|
| 167 |
+
}, "~N,~B");
|
| 168 |
+
Clazz.defineMethod (c$, "initModForStructure",
|
| 169 |
+
function (iModel) {
|
| 170 |
+
var key;
|
| 171 |
+
if (this.legendres != null) this.fixLegendre ();
|
| 172 |
+
this.sigma = new JU.Matrix (null, this.modDim, 3);
|
| 173 |
+
this.qs = null;
|
| 174 |
+
this.modMatrices = Clazz.newArray (-1, [this.sigma, null]);
|
| 175 |
+
var pt;
|
| 176 |
+
for (var i = 0; i < this.modDim; i++) {
|
| 177 |
+
pt = this.getMod ("W_" + (i + 1));
|
| 178 |
+
if (pt == null) {
|
| 179 |
+
this.cr.appendLoadNote ("NOTE!: Not enough cell wave vectors for d=" + this.modDim);
|
| 180 |
+
return;
|
| 181 |
+
}this.fixDouble (pt);
|
| 182 |
+
this.cr.appendLoadNote ("W_" + (i + 1) + " = " + JU.Escape.e (pt));
|
| 183 |
+
this.cr.appendUunitCellInfo ("q" + (i + 1) + "=" + pt[0] + " " + pt[1] + " " + pt[2]);
|
| 184 |
+
this.sigma.getArray ()[i] = Clazz.newDoubleArray (-1, [pt[0], pt[1], pt[2]]);
|
| 185 |
+
}
|
| 186 |
+
var map = new java.util.Hashtable ();
|
| 187 |
+
for (var e, $e = this.htModulation.entrySet ().iterator (); $e.hasNext () && ((e = $e.next ()) || true);) {
|
| 188 |
+
if ((key = this.checkKey (e.getKey (), false)) == null) continue;
|
| 189 |
+
pt = e.getValue ();
|
| 190 |
+
switch (key.charAt (0)) {
|
| 191 |
+
case 'O':
|
| 192 |
+
this.haveOccupancy = true;
|
| 193 |
+
case 'D':
|
| 194 |
+
case 'M':
|
| 195 |
+
case 'U':
|
| 196 |
+
if (pt[2] == 1 && key.charAt (2) != 'S' && key.charAt (2) != 'T' && key.charAt (2) != 'L') {
|
| 197 |
+
var ipt = key.indexOf ("?");
|
| 198 |
+
if (ipt >= 0) {
|
| 199 |
+
var s = key.substring (ipt + 1);
|
| 200 |
+
pt = this.getMod (key.substring (0, 2) + s + "#*;*");
|
| 201 |
+
if (pt != null) this.addModulation (map, key = key.substring (0, ipt), pt, iModel);
|
| 202 |
+
} else {
|
| 203 |
+
var a = pt[0];
|
| 204 |
+
var d = 2 * 3.141592653589793 * pt[1];
|
| 205 |
+
pt[0] = (a * Math.cos (d));
|
| 206 |
+
pt[1] = (a * Math.sin (-d));
|
| 207 |
+
pt[2] = 0;
|
| 208 |
+
JU.Logger.info ("msCIF setting " + key + " " + JU.Escape.e (pt));
|
| 209 |
+
}}break;
|
| 210 |
+
case 'W':
|
| 211 |
+
if (this.modDim > 1) {
|
| 212 |
+
continue;
|
| 213 |
+
}case 'F':
|
| 214 |
+
if (key.indexOf ("_coefs_") >= 0) {
|
| 215 |
+
this.cr.appendLoadNote ("Wave vector " + key + "=" + JU.Escape.eAD (pt));
|
| 216 |
+
} else {
|
| 217 |
+
var ptHarmonic = this.calculateQCoefs (pt);
|
| 218 |
+
if (ptHarmonic == null) {
|
| 219 |
+
this.cr.appendLoadNote ("Cannot match atom wave vector " + key + " " + JU.Escape.eAD (pt) + " to a cell wave vector or its harmonic");
|
| 220 |
+
} else {
|
| 221 |
+
var k2 = key + "_coefs_";
|
| 222 |
+
if (!this.htModulation.containsKey (k2 + this.atModel)) {
|
| 223 |
+
this.addModulation (map, k2, ptHarmonic, iModel);
|
| 224 |
+
if (key.startsWith ("F_")) this.cr.appendLoadNote ("atom wave vector " + key + " = " + JU.Escape.e (pt) + " fn = " + JU.Escape.e (ptHarmonic));
|
| 225 |
+
}}}break;
|
| 226 |
+
}
|
| 227 |
+
}
|
| 228 |
+
if (!map.isEmpty ()) this.htModulation.putAll (map);
|
| 229 |
+
if (this.htSubsystems == null) {
|
| 230 |
+
this.haveAtomMods = false;
|
| 231 |
+
} else {
|
| 232 |
+
this.cr.strSupercell = null;
|
| 233 |
+
this.haveAtomMods = true;
|
| 234 |
+
this.htAtomMods = new java.util.Hashtable ();
|
| 235 |
+
}for (var e, $e = this.htModulation.entrySet ().iterator (); $e.hasNext () && ((e = $e.next ()) || true);) {
|
| 236 |
+
if ((key = this.checkKey (e.getKey (), true)) == null) continue;
|
| 237 |
+
var params = e.getValue ();
|
| 238 |
+
var atomName = key.substring (key.indexOf (";") + 1);
|
| 239 |
+
var pt_ = atomName.indexOf ("#=");
|
| 240 |
+
if (pt_ >= 0) {
|
| 241 |
+
params = this.getMod (atomName.substring (pt_ + 2));
|
| 242 |
+
atomName = atomName.substring (0, pt_);
|
| 243 |
+
}if (JU.Logger.debuggingHigh) JU.Logger.debug ("SetModulation: " + key + " " + JU.Escape.e (params));
|
| 244 |
+
var type = key.charAt (0);
|
| 245 |
+
pt_ = key.indexOf ("#") + 1;
|
| 246 |
+
var utens = null;
|
| 247 |
+
switch (type) {
|
| 248 |
+
case 'U':
|
| 249 |
+
utens = key.substring (pt_, key.indexOf (";"));
|
| 250 |
+
case 'O':
|
| 251 |
+
case 'D':
|
| 252 |
+
case 'M':
|
| 253 |
+
if (this.modAverage) break;
|
| 254 |
+
var axis = key.charAt (pt_);
|
| 255 |
+
type = this.getModType (key);
|
| 256 |
+
if (this.htAtomMods == null) this.htAtomMods = new java.util.Hashtable ();
|
| 257 |
+
var p = Clazz.newDoubleArray (params.length, 0);
|
| 258 |
+
for (var i = p.length; --i >= 0; ) p[i] = params[i];
|
| 259 |
+
|
| 260 |
+
var qcoefs = this.getQCoefs (key);
|
| 261 |
+
if (qcoefs == null) throw new Exception ("Missing cell wave vector for atom wave vector for " + key + " " + JU.Escape.e (params));
|
| 262 |
+
this.addAtomModulation (atomName, axis, type, p, utens, qcoefs);
|
| 263 |
+
this.haveAtomMods = true;
|
| 264 |
+
break;
|
| 265 |
+
}
|
| 266 |
+
}
|
| 267 |
+
}, "~N");
|
| 268 |
+
Clazz.defineMethod (c$, "fixLegendre",
|
| 269 |
+
function () {
|
| 270 |
+
for (var i = this.legendres.size (); --i >= 0; ) {
|
| 271 |
+
var key = this.legendres.get (i);
|
| 272 |
+
var pt = this.htModulation.get (key);
|
| 273 |
+
if (pt != null) {
|
| 274 |
+
var key1 = "O_0#0" + key.substring (key.indexOf (";"));
|
| 275 |
+
var pt1 = this.htModulation.get (key1);
|
| 276 |
+
if (pt1 == null) {
|
| 277 |
+
JU.Logger.error ("Crenel " + key1 + " not found for legendre modulation " + key);
|
| 278 |
+
pt[2] = NaN;
|
| 279 |
+
} else {
|
| 280 |
+
this.htModulation.put (key, Clazz.newDoubleArray (-1, [pt1[0], pt1[1], pt[0], pt[1]]));
|
| 281 |
+
}}}
|
| 282 |
+
});
|
| 283 |
+
Clazz.defineMethod (c$, "fixDouble",
|
| 284 |
+
function (pt) {
|
| 285 |
+
if (this.cr.fixJavaFloat) for (var i = pt.length; --i >= 0; ) pt[i] = JU.PT.fixDouble (pt[i], 100000.0);
|
| 286 |
+
|
| 287 |
+
}, "~A");
|
| 288 |
+
Clazz.overrideMethod (c$, "getQCoefs",
|
| 289 |
+
function (key) {
|
| 290 |
+
var fn = Math.max (0, this.cr.parseIntAt (key, 2));
|
| 291 |
+
if (fn == 0) {
|
| 292 |
+
if (this.qlist100 == null) {
|
| 293 |
+
this.qlist100 = Clazz.newDoubleArray (this.modDim, 0);
|
| 294 |
+
this.qlist100[0] = 1;
|
| 295 |
+
}return this.qlist100;
|
| 296 |
+
}return this.getMod ("F_coefs_" + fn);
|
| 297 |
+
}, "~S");
|
| 298 |
+
Clazz.overrideMethod (c$, "getModType",
|
| 299 |
+
function (key) {
|
| 300 |
+
var type = key.charAt (0);
|
| 301 |
+
var id = key.charAt (2);
|
| 302 |
+
return (id == 'S' ? 's' : id == 'T' ? 't' : id == 'L' ? (type == 'D' ? 'l' : 'L') : id == '0' ? 'c' : type == 'D' ? 'f' : type == 'O' ? 'o' : type == 'M' ? 'm' : type == 'U' ? 'u' : '?');
|
| 303 |
+
}, "~S");
|
| 304 |
+
Clazz.defineMethod (c$, "calculateQCoefs",
|
| 305 |
+
function (p) {
|
| 306 |
+
if (this.qs == null) {
|
| 307 |
+
this.qs = new Array (this.modDim);
|
| 308 |
+
for (var i = 0; i < this.modDim; i++) {
|
| 309 |
+
this.qs[i] = this.toP3 (this.getMod ("W_" + (i + 1)));
|
| 310 |
+
}
|
| 311 |
+
}var pt = this.toP3 (p);
|
| 312 |
+
for (var i = 0; i < this.modDim; i++) if (this.qs[i] != null) {
|
| 313 |
+
var ifn = this.approxInt (pt.dot (this.qs[i]) / this.qs[i].dot (this.qs[i]));
|
| 314 |
+
if (ifn != 0) {
|
| 315 |
+
p = Clazz.newDoubleArray (this.modDim, 0);
|
| 316 |
+
p[i] = ifn;
|
| 317 |
+
return p;
|
| 318 |
+
}}
|
| 319 |
+
var p3 = this.toP3 (p);
|
| 320 |
+
var jmin = (this.modDim < 2 ? 0 : -3);
|
| 321 |
+
var jmax = (this.modDim < 2 ? 0 : 3);
|
| 322 |
+
var kmin = (this.modDim < 3 ? 0 : -3);
|
| 323 |
+
var kmax = (this.modDim < 3 ? 0 : 3);
|
| 324 |
+
for (var i = -3; i <= 3; i++) for (var j = jmin; j <= jmax; j++) for (var k = kmin; k <= kmax; k++) {
|
| 325 |
+
pt.setT (this.qs[0]);
|
| 326 |
+
pt.scale (i);
|
| 327 |
+
if (this.modDim > 1 && this.qs[1] != null) pt.scaleAdd2 (j, this.qs[1], pt);
|
| 328 |
+
if (this.modDim > 2 && this.qs[2] != null) pt.scaleAdd2 (k, this.qs[2], pt);
|
| 329 |
+
if (pt.distanceSquared (p3) < 0.0001) {
|
| 330 |
+
p = Clazz.newDoubleArray (this.modDim, 0);
|
| 331 |
+
switch (this.modDim) {
|
| 332 |
+
default:
|
| 333 |
+
p[2] = k;
|
| 334 |
+
case 2:
|
| 335 |
+
p[1] = j;
|
| 336 |
+
case 1:
|
| 337 |
+
p[0] = i;
|
| 338 |
+
break;
|
| 339 |
+
}
|
| 340 |
+
return p;
|
| 341 |
+
}pt.setT (this.qs[0]);
|
| 342 |
+
pt.scale (1 / i);
|
| 343 |
+
if (this.modDim > 1 && this.qs[1] != null) pt.scaleAdd2 (1 / j, this.qs[1], pt);
|
| 344 |
+
if (this.modDim > 2 && this.qs[2] != null) pt.scaleAdd2 (1 / k, this.qs[2], pt);
|
| 345 |
+
if (pt.distanceSquared (p3) < 0.0001) {
|
| 346 |
+
p = Clazz.newDoubleArray (this.modDim, 0);
|
| 347 |
+
switch (this.modDim) {
|
| 348 |
+
default:
|
| 349 |
+
p[2] = 1 / k;
|
| 350 |
+
case 2:
|
| 351 |
+
p[1] = 1 / j;
|
| 352 |
+
case 1:
|
| 353 |
+
p[0] = 1 / i;
|
| 354 |
+
break;
|
| 355 |
+
}
|
| 356 |
+
return p;
|
| 357 |
+
}}
|
| 358 |
+
|
| 359 |
+
|
| 360 |
+
pt = this.toP3 (p);
|
| 361 |
+
for (var i = 0; i < this.modDim; i++) if (this.qs[i] != null) {
|
| 362 |
+
p3 = this.qs[i];
|
| 363 |
+
var ifn = 0;
|
| 364 |
+
if (pt.x != 0) ifn = this.approxInt (pt.x / p3.x);
|
| 365 |
+
if (pt.y != 0) ifn = Math.max (this.approxInt (pt.y / p3.y), ifn);
|
| 366 |
+
if (ifn == 0 && pt.z != 0) ifn = Math.max (this.approxInt (pt.z / p3.z), ifn);
|
| 367 |
+
if (ifn == 0) continue;
|
| 368 |
+
if (p3.x != 0 && this.approxInt (10 + p3.x * ifn - pt.x) == 0 || p3.y != 0 && this.approxInt (10 + p3.y * ifn - pt.y) == 0 || p3.z != 0 && this.approxInt (10 + p3.z * ifn - pt.z) == 0) continue;
|
| 369 |
+
p = Clazz.newDoubleArray (this.modDim, 0);
|
| 370 |
+
p[i] = ifn;
|
| 371 |
+
return p;
|
| 372 |
+
}
|
| 373 |
+
return null;
|
| 374 |
+
}, "~A");
|
| 375 |
+
Clazz.defineMethod (c$, "approxInt",
|
| 376 |
+
function (fn) {
|
| 377 |
+
var ifn = Math.round (fn);
|
| 378 |
+
return (Math.abs (fn - ifn) < 0.001 ? ifn : 0);
|
| 379 |
+
}, "~N");
|
| 380 |
+
Clazz.defineMethod (c$, "toP3",
|
| 381 |
+
function (x) {
|
| 382 |
+
return JU.P3.new3 (x[0], x[1], x[2]);
|
| 383 |
+
}, "~A");
|
| 384 |
+
Clazz.defineMethod (c$, "addAtomModulation",
|
| 385 |
+
function (atomName, axis, type, params, utens, qcoefs) {
|
| 386 |
+
var list = this.htAtomMods.get (atomName);
|
| 387 |
+
if (list == null) {
|
| 388 |
+
this.ac++;
|
| 389 |
+
this.htAtomMods.put (atomName, list = new JU.Lst ());
|
| 390 |
+
}list.addLast ( new JU.Modulation (axis, type, params, utens, qcoefs));
|
| 391 |
+
this.modCount++;
|
| 392 |
+
}, "~S,~S,~S,~A,~S,~A");
|
| 393 |
+
Clazz.overrideMethod (c$, "addSubsystem",
|
| 394 |
+
function (code, w) {
|
| 395 |
+
if (code == null) return;
|
| 396 |
+
var ss = new J.adapter.readers.cif.Subsystem (this, code, w);
|
| 397 |
+
this.cr.appendLoadNote ("subsystem " + code + "\n" + w);
|
| 398 |
+
this.setSubsystem (code, ss);
|
| 399 |
+
}, "~S,JU.Matrix");
|
| 400 |
+
Clazz.defineMethod (c$, "addUStr",
|
| 401 |
+
function (atom, id, val) {
|
| 402 |
+
var i = Clazz.doubleToInt ("U11U22U33U12U13U23UISO".indexOf (id) / 3);
|
| 403 |
+
if (JU.Logger.debuggingHigh) JU.Logger.debug ("MOD RDR adding " + id + " " + i + " " + val + " to " + atom.anisoBorU[i]);
|
| 404 |
+
this.cr.asc.setU (atom, i, val + atom.anisoBorU[i]);
|
| 405 |
+
}, "J.adapter.smarter.Atom,~S,~N");
|
| 406 |
+
Clazz.defineMethod (c$, "modulateAtom",
|
| 407 |
+
function (a) {
|
| 408 |
+
if (this.modCoord && this.htSubsystems != null) {
|
| 409 |
+
var ptc = JU.P3.newP (a);
|
| 410 |
+
var spt = this.getSymmetry (a);
|
| 411 |
+
spt.toCartesian (ptc, true);
|
| 412 |
+
}var list = this.htAtomMods.get (a.atomName);
|
| 413 |
+
if (list == null && a.altLoc != '\0' && this.htSubsystems != null) {
|
| 414 |
+
list = new JU.Lst ();
|
| 415 |
+
}if (list == null || this.symmetry == null || a.bsSymmetry == null) return;
|
| 416 |
+
var iop = Math.max (a.bsSymmetry.nextSetBit (0), 0);
|
| 417 |
+
if (this.modLast) iop = Math.max ((a.bsSymmetry.length () - 1) % this.nOps, iop);
|
| 418 |
+
if (JU.Logger.debuggingHigh) JU.Logger.debug ("\nsetModulation: i=" + a.index + " " + a.atomName + " xyz=" + a + " occ=" + a.foccupancy);
|
| 419 |
+
if (iop != this.iopLast) {
|
| 420 |
+
this.iopLast = iop;
|
| 421 |
+
this.gammaE = new JU.M3 ();
|
| 422 |
+
this.getSymmetry (a).getSpaceGroupOperation (iop).getRotationScale (this.gammaE);
|
| 423 |
+
}if (JU.Logger.debugging) {
|
| 424 |
+
JU.Logger.debug ("setModulation iop = " + iop + " " + this.symmetry.getSpaceGroupXyz (iop, false) + " " + a.bsSymmetry);
|
| 425 |
+
}var ms = new JU.ModulationSet ().setMod (a.index + " " + a.atomName, this.getAtomR0 (this.cr.asc.atoms[a.atomSite]), this.getAtomR0 (a), this.modDim, list, this.gammaE, this.getMatrices (a), this.getSymmetry (a), this.nOps, iop, Clazz.instanceOf (a.vib, JU.Vibration) ? a.vib : null, this.isCommensurate);
|
| 426 |
+
ms.calculate (this.modTUV, false);
|
| 427 |
+
if (!Float.isNaN (ms.vOcc)) {
|
| 428 |
+
a.foccupancy = ms.setOccupancy (this.getMod ("J_O#0;" + a.atomName), a.foccupancy, (a.vib == null ? 0 : a.vib.x));
|
| 429 |
+
}if (ms.htUij != null) {
|
| 430 |
+
var t = (a.tensors == null ? null : a.tensors.get (0));
|
| 431 |
+
if (t != null && t.parBorU != null) {
|
| 432 |
+
a.anisoBorU = Clazz.newFloatArray (8, 0);
|
| 433 |
+
for (var i = 0; i < 8; i++) a.anisoBorU[i] = t.parBorU[i];
|
| 434 |
+
|
| 435 |
+
t.isUnmodulated = true;
|
| 436 |
+
}if (a.anisoBorU == null) {
|
| 437 |
+
JU.Logger.error ("MOD RDR cannot modulate nonexistent atom anisoBorU for atom " + a.atomName);
|
| 438 |
+
} else {
|
| 439 |
+
if (JU.Logger.debuggingHigh) {
|
| 440 |
+
JU.Logger.debug ("setModulation Uij(initial)=" + JU.Escape.eAF (a.anisoBorU));
|
| 441 |
+
JU.Logger.debug ("setModulation tensor=" + JU.Escape.e ((a.tensors.get (0)).getInfo ("all")));
|
| 442 |
+
}for (var e, $e = ms.htUij.entrySet ().iterator (); $e.hasNext () && ((e = $e.next ()) || true);) this.addUStr (a, e.getKey (), e.getValue ().floatValue ());
|
| 443 |
+
|
| 444 |
+
var sym = this.getAtomSymmetry (a, this.symmetry);
|
| 445 |
+
t = this.cr.asc.getXSymmetry ().addRotatedTensor (a, sym.getTensor (this.cr.vwr, a.anisoBorU), iop, false, sym);
|
| 446 |
+
t.isModulated = true;
|
| 447 |
+
t.id = JU.Escape.e (a.anisoBorU);
|
| 448 |
+
a.bfactor = a.anisoBorU[7] * 100;
|
| 449 |
+
a.anisoBorU = null;
|
| 450 |
+
if (JU.Logger.debuggingHigh) {
|
| 451 |
+
JU.Logger.debug ("setModulation Uij(final)=" + JU.Escape.eAF (a.anisoBorU) + "\n");
|
| 452 |
+
JU.Logger.debug ("setModulation tensor=" + JU.Escape.e ((a.tensors.get (1)).getInfo ("all")));
|
| 453 |
+
}}}if (Float.isNaN (ms.x)) ms.set (0, 0, 0);
|
| 454 |
+
a.vib = ms;
|
| 455 |
+
}, "J.adapter.smarter.Atom");
|
| 456 |
+
Clazz.defineMethod (c$, "getAtomR0",
|
| 457 |
+
function (atom) {
|
| 458 |
+
var r0 = JU.P3.newP (atom);
|
| 459 |
+
if (this.supercellSymmetry != null) {
|
| 460 |
+
this.supercellSymmetry.toCartesian (r0, true);
|
| 461 |
+
this.symmetry.toFractional (r0, true);
|
| 462 |
+
}return r0;
|
| 463 |
+
}, "J.adapter.smarter.Atom");
|
| 464 |
+
Clazz.overrideMethod (c$, "getAtomSymmetry",
|
| 465 |
+
function (a, defaultSymmetry) {
|
| 466 |
+
var ss;
|
| 467 |
+
return (this.htSubsystems == null || (ss = this.getSubsystem (a)) == null ? defaultSymmetry : ss.getSymmetry ());
|
| 468 |
+
}, "J.adapter.smarter.Atom,J.api.SymmetryInterface");
|
| 469 |
+
Clazz.defineMethod (c$, "setSubsystem",
|
| 470 |
+
function (code, system) {
|
| 471 |
+
if (this.htSubsystems == null) this.htSubsystems = new java.util.Hashtable ();
|
| 472 |
+
this.htSubsystems.put (code, system);
|
| 473 |
+
this.setSubsystemOptions ();
|
| 474 |
+
}, "~S,J.adapter.readers.cif.Subsystem");
|
| 475 |
+
Clazz.defineMethod (c$, "getMatrices",
|
| 476 |
+
function (a) {
|
| 477 |
+
var ss = this.getSubsystem (a);
|
| 478 |
+
return (ss == null ? this.modMatrices : ss.getModMatrices ());
|
| 479 |
+
}, "J.adapter.smarter.Atom");
|
| 480 |
+
Clazz.defineMethod (c$, "getSymmetry",
|
| 481 |
+
function (a) {
|
| 482 |
+
var ss = this.getSubsystem (a);
|
| 483 |
+
return (ss == null ? this.symmetry : ss.getSymmetry ());
|
| 484 |
+
}, "J.adapter.smarter.Atom");
|
| 485 |
+
Clazz.defineMethod (c$, "getSubsystem",
|
| 486 |
+
function (a) {
|
| 487 |
+
return (this.htSubsystems == null ? null : this.htSubsystems.get ("" + a.altLoc));
|
| 488 |
+
}, "J.adapter.smarter.Atom");
|
| 489 |
+
Clazz.overrideMethod (c$, "setMinMax0",
|
| 490 |
+
function (minXYZ, maxXYZ) {
|
| 491 |
+
if (this.htSubsystems == null) return;
|
| 492 |
+
var symmetry = this.getDefaultUnitCell ();
|
| 493 |
+
this.minXYZ0 = JU.P3.newP (minXYZ);
|
| 494 |
+
this.maxXYZ0 = JU.P3.newP (maxXYZ);
|
| 495 |
+
var pt0 = JU.P3.newP (minXYZ);
|
| 496 |
+
var pt1 = JU.P3.newP (maxXYZ);
|
| 497 |
+
var pt = new JU.P3 ();
|
| 498 |
+
symmetry.toCartesian (pt0, true);
|
| 499 |
+
symmetry.toCartesian (pt1, true);
|
| 500 |
+
var pts = JU.BoxInfo.unitCubePoints;
|
| 501 |
+
if (this.sigma == null) {
|
| 502 |
+
JU.Logger.error ("Why are we in MSRdr.setMinMax0 without modulation init?");
|
| 503 |
+
return;
|
| 504 |
+
}for (var e, $e = this.htSubsystems.entrySet ().iterator (); $e.hasNext () && ((e = $e.next ()) || true);) {
|
| 505 |
+
var sym = e.getValue ().getSymmetry ();
|
| 506 |
+
for (var i = 8; --i >= 0; ) {
|
| 507 |
+
pt.x = (pts[i].x == 0 ? pt0.x : pt1.x);
|
| 508 |
+
pt.y = (pts[i].y == 0 ? pt0.y : pt1.y);
|
| 509 |
+
pt.z = (pts[i].z == 0 ? pt0.z : pt1.z);
|
| 510 |
+
this.expandMinMax (pt, sym, minXYZ, maxXYZ);
|
| 511 |
+
}
|
| 512 |
+
}
|
| 513 |
+
}, "JU.P3,JU.P3");
|
| 514 |
+
Clazz.defineMethod (c$, "expandMinMax",
|
| 515 |
+
function (pt, sym, minXYZ, maxXYZ) {
|
| 516 |
+
var pt2 = JU.P3.newP (pt);
|
| 517 |
+
var slop = 0.0001;
|
| 518 |
+
sym.toFractional (pt2, false);
|
| 519 |
+
if (minXYZ.x > pt2.x + slop) minXYZ.x = Clazz.doubleToInt (Math.floor (pt2.x)) - 1;
|
| 520 |
+
if (minXYZ.y > pt2.y + slop) minXYZ.y = Clazz.doubleToInt (Math.floor (pt2.y)) - 1;
|
| 521 |
+
if (minXYZ.z > pt2.z + slop) minXYZ.z = Clazz.doubleToInt (Math.floor (pt2.z)) - 1;
|
| 522 |
+
if (maxXYZ.x < pt2.x - slop) maxXYZ.x = Clazz.doubleToInt (Math.ceil (pt2.x)) + 1;
|
| 523 |
+
if (maxXYZ.y < pt2.y - slop) maxXYZ.y = Clazz.doubleToInt (Math.ceil (pt2.y)) + 1;
|
| 524 |
+
if (maxXYZ.z < pt2.z - slop) maxXYZ.z = Clazz.doubleToInt (Math.ceil (pt2.z)) + 1;
|
| 525 |
+
}, "JU.P3,J.api.SymmetryInterface,JU.P3,JU.P3");
|
| 526 |
+
Clazz.defineMethod (c$, "trimAtomSet",
|
| 527 |
+
function () {
|
| 528 |
+
if (!this.cr.doApplySymmetry) return;
|
| 529 |
+
var asc = this.cr.asc;
|
| 530 |
+
var bs = asc.bsAtoms;
|
| 531 |
+
var sym = this.getDefaultUnitCell ();
|
| 532 |
+
var atoms = asc.atoms;
|
| 533 |
+
var pt = new JU.P3 ();
|
| 534 |
+
if (bs == null) bs = asc.bsAtoms = JU.BSUtil.newBitSet2 (0, asc.ac);
|
| 535 |
+
for (var i = bs.nextSetBit (0); i >= 0; i = bs.nextSetBit (i + 1)) {
|
| 536 |
+
var a = atoms[i];
|
| 537 |
+
var isOK = (!this.isCommensurate || this.modAverage || a.foccupancy >= 0.5);
|
| 538 |
+
if (isOK) {
|
| 539 |
+
pt.setT (a);
|
| 540 |
+
if (a.vib != null) pt.add (a.vib);
|
| 541 |
+
this.getSymmetry (a).toCartesian (pt, false);
|
| 542 |
+
sym.toFractional (pt, false);
|
| 543 |
+
if (this.cr.fixJavaFloat) JU.PT.fixPtFloats (pt, 100000.0);
|
| 544 |
+
isOK = asc.xtalSymmetry.isWithinCell (3, pt, this.minXYZ0.x, this.maxXYZ0.x, this.minXYZ0.y, this.maxXYZ0.y, this.minXYZ0.z, this.maxXYZ0.z, 0.001);
|
| 545 |
+
}if (isOK) {
|
| 546 |
+
if (this.cr.fixJavaFloat) JU.PT.fixPtFloats (a, 100000.0);
|
| 547 |
+
} else {
|
| 548 |
+
bs.clear (i);
|
| 549 |
+
}}
|
| 550 |
+
});
|
| 551 |
+
Clazz.defineMethod (c$, "getDefaultUnitCell",
|
| 552 |
+
function () {
|
| 553 |
+
return (this.modCell != null && this.htSubsystems.containsKey (this.modCell) ? this.htSubsystems.get (this.modCell).getSymmetry () : this.cr.asc.getSymmetry ());
|
| 554 |
+
});
|
| 555 |
+
Clazz.overrideMethod (c$, "getSymmetryFromCode",
|
| 556 |
+
function (code) {
|
| 557 |
+
return this.htSubsystems.get (code).getSymmetry ();
|
| 558 |
+
}, "~S");
|
| 559 |
+
Clazz.overrideMethod (c$, "addLatticeVector",
|
| 560 |
+
function (lattvecs, data) {
|
| 561 |
+
var a = null;
|
| 562 |
+
var c = data.charAt (0);
|
| 563 |
+
var dim = this.modDim + 3;
|
| 564 |
+
switch (c) {
|
| 565 |
+
case 'P':
|
| 566 |
+
case 'X':
|
| 567 |
+
break;
|
| 568 |
+
case 'A':
|
| 569 |
+
case 'B':
|
| 570 |
+
case 'C':
|
| 571 |
+
case 'I':
|
| 572 |
+
a = Clazz.newFloatArray (-1, [0.5, 0.5, 0.5]);
|
| 573 |
+
if (c != 'I') a[c.charCodeAt (0) - 65] = 0;
|
| 574 |
+
break;
|
| 575 |
+
case 'F':
|
| 576 |
+
this.addLatticeVector (lattvecs, "A");
|
| 577 |
+
this.addLatticeVector (lattvecs, "B");
|
| 578 |
+
this.addLatticeVector (lattvecs, "C");
|
| 579 |
+
break;
|
| 580 |
+
case 'M':
|
| 581 |
+
dim++;
|
| 582 |
+
case '0':
|
| 583 |
+
if (data.indexOf (".") >= 0) a = J.adapter.smarter.AtomSetCollectionReader.getTokensFloat (data, null, dim);
|
| 584 |
+
break;
|
| 585 |
+
default:
|
| 586 |
+
return false;
|
| 587 |
+
}
|
| 588 |
+
if (a != null) lattvecs.addLast (a);
|
| 589 |
+
return true;
|
| 590 |
+
}, "JU.Lst,~S");
|
| 591 |
+
Clazz.defineStatics (c$,
|
| 592 |
+
"U_LIST", "U11U22U33U12U13U23UISO");
|
| 593 |
+
});
|
static/j2s/J/adapter/readers/cif/Subsystem.js
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Clazz.declarePackage ("J.adapter.readers.cif");
|
| 2 |
+
Clazz.load (null, "J.adapter.readers.cif.Subsystem", ["JU.Lst", "$.Matrix", "$.V3", "JU.Logger", "$.SimpleUnitCell"], function () {
|
| 3 |
+
c$ = Clazz.decorateAsClass (function () {
|
| 4 |
+
this.msRdr = null;
|
| 5 |
+
this.code = null;
|
| 6 |
+
this.d = 0;
|
| 7 |
+
this.w = null;
|
| 8 |
+
this.symmetry = null;
|
| 9 |
+
this.modMatrices = null;
|
| 10 |
+
this.isFinalized = false;
|
| 11 |
+
Clazz.instantialize (this, arguments);
|
| 12 |
+
}, J.adapter.readers.cif, "Subsystem");
|
| 13 |
+
Clazz.makeConstructor (c$,
|
| 14 |
+
function (msRdr, code, w) {
|
| 15 |
+
this.msRdr = msRdr;
|
| 16 |
+
this.code = code;
|
| 17 |
+
this.w = w;
|
| 18 |
+
this.d = w.getArray ().length - 3;
|
| 19 |
+
}, "J.adapter.readers.cif.MSRdr,~S,JU.Matrix");
|
| 20 |
+
Clazz.defineMethod (c$, "getSymmetry",
|
| 21 |
+
function () {
|
| 22 |
+
if (!this.isFinalized) this.setSymmetry (true);
|
| 23 |
+
return this.symmetry;
|
| 24 |
+
});
|
| 25 |
+
Clazz.defineMethod (c$, "getModMatrices",
|
| 26 |
+
function () {
|
| 27 |
+
if (!this.isFinalized) this.setSymmetry (true);
|
| 28 |
+
return this.modMatrices;
|
| 29 |
+
});
|
| 30 |
+
Clazz.defineMethod (c$, "setSymmetry",
|
| 31 |
+
function (setOperators) {
|
| 32 |
+
var a;
|
| 33 |
+
JU.Logger.info ("[subsystem " + this.code + "]");
|
| 34 |
+
var winv = this.w.inverse ();
|
| 35 |
+
JU.Logger.info ("w=" + this.w);
|
| 36 |
+
JU.Logger.info ("w_inv=" + winv);
|
| 37 |
+
var w33 = this.w.getSubmatrix (0, 0, 3, 3);
|
| 38 |
+
var wd3 = this.w.getSubmatrix (3, 0, this.d, 3);
|
| 39 |
+
var w3d = this.w.getSubmatrix (0, 3, 3, this.d);
|
| 40 |
+
var wdd = this.w.getSubmatrix (3, 3, this.d, this.d);
|
| 41 |
+
var sigma = this.msRdr.getSigma ();
|
| 42 |
+
var sigma_nu = wdd.mul (sigma).add (wd3).mul (w3d.mul (sigma).add (w33).inverse ());
|
| 43 |
+
var tFactor = wdd.sub (sigma_nu.mul (w3d));
|
| 44 |
+
JU.Logger.info ("sigma_nu = " + sigma_nu);
|
| 45 |
+
var s0 = this.msRdr.cr.asc.getSymmetry ();
|
| 46 |
+
var vu43 = s0.getUnitCellVectors ();
|
| 47 |
+
var vr43 = JU.SimpleUnitCell.getReciprocal (vu43, null, 1);
|
| 48 |
+
var mard3 = new JU.Matrix (null, 3 + this.d, 3);
|
| 49 |
+
var mar3 = new JU.Matrix (null, 3, 3);
|
| 50 |
+
var mard3a = mard3.getArray ();
|
| 51 |
+
var mar3a = mar3.getArray ();
|
| 52 |
+
for (var i = 0; i < 3; i++) mard3a[i] = mar3a[i] = Clazz.newDoubleArray (-1, [vr43[i + 1].x, vr43[i + 1].y, vr43[i + 1].z]);
|
| 53 |
+
|
| 54 |
+
var sx = sigma.mul (mar3);
|
| 55 |
+
a = sx.getArray ();
|
| 56 |
+
for (var i = 0; i < this.d; i++) mard3a[i + 3] = a[i];
|
| 57 |
+
|
| 58 |
+
a = this.w.mul (mard3).getArray ();
|
| 59 |
+
var uc_nu = new Array (4);
|
| 60 |
+
uc_nu[0] = vu43[0];
|
| 61 |
+
for (var i = 0; i < 3; i++) uc_nu[i + 1] = JU.V3.new3 (a[i][0], a[i][1], a[i][2]);
|
| 62 |
+
|
| 63 |
+
uc_nu = JU.SimpleUnitCell.getReciprocal (uc_nu, null, 1);
|
| 64 |
+
this.symmetry = (this.msRdr.cr.getInterface ("JS.Symmetry")).getUnitCell (uc_nu, false, null);
|
| 65 |
+
this.modMatrices = Clazz.newArray (-1, [sigma_nu, tFactor]);
|
| 66 |
+
if (!setOperators) return;
|
| 67 |
+
this.isFinalized = true;
|
| 68 |
+
JU.Logger.info ("unit cell parameters: " + this.symmetry.getUnitCellInfo ());
|
| 69 |
+
this.symmetry.createSpaceGroup (-1, "[subsystem " + this.code + "]", new JU.Lst (), this.d);
|
| 70 |
+
var nOps = s0.getSpaceGroupOperationCount ();
|
| 71 |
+
for (var iop = 0; iop < nOps; iop++) {
|
| 72 |
+
var rv = s0.getOperationRsVs (iop);
|
| 73 |
+
var r0 = rv.getRotation ();
|
| 74 |
+
var v0 = rv.getTranslation ();
|
| 75 |
+
var r = this.w.mul (r0).mul (winv);
|
| 76 |
+
var v = this.w.mul (v0);
|
| 77 |
+
var code = this.code;
|
| 78 |
+
if (this.isMixed (r)) {
|
| 79 |
+
for (var e, $e = this.msRdr.htSubsystems.entrySet ().iterator (); $e.hasNext () && ((e = $e.next ()) || true);) {
|
| 80 |
+
var ss = e.getValue ();
|
| 81 |
+
if (ss === this) continue;
|
| 82 |
+
var rj = ss.w.mul (r0).mul (winv);
|
| 83 |
+
if (!this.isMixed (rj)) {
|
| 84 |
+
r = rj;
|
| 85 |
+
v = ss.w.mul (v0);
|
| 86 |
+
code = ss.code;
|
| 87 |
+
break;
|
| 88 |
+
}}
|
| 89 |
+
}var jf = this.symmetry.addOp (code, r, v, sigma_nu);
|
| 90 |
+
JU.Logger.info (this.code + "." + (iop + 1) + (this.code.equals (code) ? " " : ">" + code + " ") + jf);
|
| 91 |
+
}
|
| 92 |
+
}, "~B");
|
| 93 |
+
Clazz.defineMethod (c$, "isMixed",
|
| 94 |
+
function (r) {
|
| 95 |
+
var a = r.getArray ();
|
| 96 |
+
for (var i = 3; --i >= 0; ) for (var j = 3 + this.d; --j >= 3; ) if (a[i][j] != 0) return true;
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
return false;
|
| 100 |
+
}, "JU.Matrix");
|
| 101 |
+
Clazz.overrideMethod (c$, "toString",
|
| 102 |
+
function () {
|
| 103 |
+
return "Subsystem " + this.code + "\n" + this.w;
|
| 104 |
+
});
|
| 105 |
+
});
|
static/j2s/J/adapter/readers/molxyz/Mol3DReader.js
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Clazz.declarePackage ("J.adapter.readers.molxyz");
|
| 2 |
+
Clazz.load (["J.adapter.readers.molxyz.MolReader"], "J.adapter.readers.molxyz.Mol3DReader", null, function () {
|
| 3 |
+
c$ = Clazz.declareType (J.adapter.readers.molxyz, "Mol3DReader", J.adapter.readers.molxyz.MolReader);
|
| 4 |
+
Clazz.overrideMethod (c$, "initializeReader",
|
| 5 |
+
function () {
|
| 6 |
+
this.allow2D = false;
|
| 7 |
+
});
|
| 8 |
+
});
|
static/j2s/J/adapter/readers/molxyz/MolReader.js
ADDED
|
@@ -0,0 +1,306 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Clazz.declarePackage ("J.adapter.readers.molxyz");
|
| 2 |
+
Clazz.load (["J.adapter.smarter.AtomSetCollectionReader"], "J.adapter.readers.molxyz.MolReader", ["java.lang.Exception", "java.util.Hashtable", "JU.PT", "J.adapter.smarter.Atom", "J.api.JmolAdapter", "JU.Logger"], function () {
|
| 3 |
+
c$ = Clazz.decorateAsClass (function () {
|
| 4 |
+
this.optimize2D = false;
|
| 5 |
+
this.haveAtomSerials = false;
|
| 6 |
+
this.dimension = null;
|
| 7 |
+
this.allow2D = true;
|
| 8 |
+
this.iatom0 = 0;
|
| 9 |
+
this.vr = null;
|
| 10 |
+
this.atomCount = 0;
|
| 11 |
+
this.atomData = null;
|
| 12 |
+
Clazz.instantialize (this, arguments);
|
| 13 |
+
}, J.adapter.readers.molxyz, "MolReader", J.adapter.smarter.AtomSetCollectionReader);
|
| 14 |
+
Clazz.overrideMethod (c$, "initializeReader",
|
| 15 |
+
function () {
|
| 16 |
+
this.optimize2D = this.checkFilterKey ("2D");
|
| 17 |
+
});
|
| 18 |
+
Clazz.overrideMethod (c$, "checkLine",
|
| 19 |
+
function () {
|
| 20 |
+
var isMDL = (this.line.startsWith ("$MDL"));
|
| 21 |
+
if (isMDL) {
|
| 22 |
+
this.discardLinesUntilStartsWith ("$HDR");
|
| 23 |
+
this.rd ();
|
| 24 |
+
if (this.line == null) {
|
| 25 |
+
JU.Logger.warn ("$HDR not found in MDL RG file");
|
| 26 |
+
this.continuing = false;
|
| 27 |
+
return false;
|
| 28 |
+
}} else if (this.line.equals ("M END")) {
|
| 29 |
+
return true;
|
| 30 |
+
}if (this.doGetModel (++this.modelNumber, null)) {
|
| 31 |
+
this.iatom0 = this.asc.ac;
|
| 32 |
+
this.processMolSdHeader ();
|
| 33 |
+
this.processCtab (isMDL);
|
| 34 |
+
this.vr = null;
|
| 35 |
+
if (this.isLastModel (this.modelNumber)) {
|
| 36 |
+
this.continuing = false;
|
| 37 |
+
return false;
|
| 38 |
+
}}if (this.line != null && this.line.indexOf ("$$$$") < 0) this.discardLinesUntilStartsWith ("$$$$");
|
| 39 |
+
return true;
|
| 40 |
+
});
|
| 41 |
+
Clazz.overrideMethod (c$, "finalizeSubclassReader",
|
| 42 |
+
function () {
|
| 43 |
+
this.finalizeReaderMR ();
|
| 44 |
+
});
|
| 45 |
+
Clazz.defineMethod (c$, "finalizeReaderMR",
|
| 46 |
+
function () {
|
| 47 |
+
if (this.optimize2D) this.set2D ();
|
| 48 |
+
this.isTrajectory = false;
|
| 49 |
+
this.finalizeReaderASCR ();
|
| 50 |
+
});
|
| 51 |
+
Clazz.defineMethod (c$, "processMolSdHeader",
|
| 52 |
+
function () {
|
| 53 |
+
var header = "";
|
| 54 |
+
var thisDataSetName = this.line.trim ();
|
| 55 |
+
this.asc.setCollectionName (thisDataSetName);
|
| 56 |
+
header += this.line + "\n";
|
| 57 |
+
this.rd ();
|
| 58 |
+
if (this.line == null) return;
|
| 59 |
+
header += this.line + "\n";
|
| 60 |
+
this.dimension = (this.line.length < 22 ? "3D" : this.line.substring (20, 22));
|
| 61 |
+
if (this.dimension.equals ("2D")) {
|
| 62 |
+
if (!this.allow2D) throw new Exception ("File is 2D, not 3D");
|
| 63 |
+
this.appendLoadNote ("This model is 2D. Its 3D structure has not been generated.");
|
| 64 |
+
}this.asc.setInfo ("dimension", this.dimension);
|
| 65 |
+
this.rd ();
|
| 66 |
+
if (this.line == null) return;
|
| 67 |
+
this.line = this.line.trim ();
|
| 68 |
+
header += this.line + "\n";
|
| 69 |
+
JU.Logger.info (header);
|
| 70 |
+
this.checkCurrentLineForScript ();
|
| 71 |
+
this.asc.setInfo ("fileHeader", header);
|
| 72 |
+
this.newAtomSet (thisDataSetName);
|
| 73 |
+
});
|
| 74 |
+
Clazz.defineMethod (c$, "processCtab",
|
| 75 |
+
function (isMDL) {
|
| 76 |
+
if (isMDL) this.discardLinesUntilStartsWith ("$CTAB");
|
| 77 |
+
if (this.rd () == null) return;
|
| 78 |
+
if (this.line.indexOf ("V3000") >= 0) {
|
| 79 |
+
this.optimize2D = (this.dimension.equals ("2D"));
|
| 80 |
+
this.vr = (this.getInterface ("J.adapter.readers.molxyz.V3000Rdr")).set (this);
|
| 81 |
+
this.discardLinesUntilContains ("COUNTS");
|
| 82 |
+
this.vr.readAtomsAndBonds (this.getTokens ());
|
| 83 |
+
} else {
|
| 84 |
+
this.readAtomsAndBonds (this.parseIntRange (this.line, 0, 3), this.parseIntRange (this.line, 3, 6));
|
| 85 |
+
}this.applySymmetryAndSetTrajectory ();
|
| 86 |
+
}, "~B");
|
| 87 |
+
Clazz.defineMethod (c$, "readAtomsAndBonds",
|
| 88 |
+
function (ac, bc) {
|
| 89 |
+
this.atomCount = ac;
|
| 90 |
+
for (var i = 0; i < ac; ++i) {
|
| 91 |
+
this.rd ();
|
| 92 |
+
var len = this.line.length;
|
| 93 |
+
var elementSymbol;
|
| 94 |
+
var x;
|
| 95 |
+
var y;
|
| 96 |
+
var z;
|
| 97 |
+
var charge = 0;
|
| 98 |
+
var isotope = 0;
|
| 99 |
+
var iAtom = -2147483648;
|
| 100 |
+
x = this.parseFloatRange (this.line, 0, 10);
|
| 101 |
+
y = this.parseFloatRange (this.line, 10, 20);
|
| 102 |
+
z = this.parseFloatRange (this.line, 20, 30);
|
| 103 |
+
if (len < 34) {
|
| 104 |
+
elementSymbol = this.line.substring (31).trim ();
|
| 105 |
+
} else {
|
| 106 |
+
elementSymbol = this.line.substring (31, 34).trim ();
|
| 107 |
+
if (elementSymbol.equals ("H1")) {
|
| 108 |
+
elementSymbol = "H";
|
| 109 |
+
isotope = 1;
|
| 110 |
+
}if (len >= 39) {
|
| 111 |
+
var code = this.parseIntRange (this.line, 36, 39);
|
| 112 |
+
if (code >= 1 && code <= 7) charge = 4 - code;
|
| 113 |
+
code = this.parseIntRange (this.line, 34, 36);
|
| 114 |
+
if (code != 0 && code >= -3 && code <= 4) {
|
| 115 |
+
isotope = J.api.JmolAdapter.getNaturalIsotope (J.api.JmolAdapter.getElementNumber (elementSymbol)) + code;
|
| 116 |
+
}if (iAtom == -2147483648 && this.haveAtomSerials) iAtom = i + 1;
|
| 117 |
+
}}this.addMolAtom (iAtom, isotope, elementSymbol, charge, x, y, z);
|
| 118 |
+
}
|
| 119 |
+
this.rd ();
|
| 120 |
+
if (this.line.startsWith ("V ")) {
|
| 121 |
+
this.readAtomValues ();
|
| 122 |
+
}if (bc == 0) this.asc.setNoAutoBond ();
|
| 123 |
+
for (var i = 0; i < bc; ++i) {
|
| 124 |
+
if (i > 0) this.rd ();
|
| 125 |
+
var iAtom1;
|
| 126 |
+
var iAtom2;
|
| 127 |
+
var stereo = 0;
|
| 128 |
+
iAtom1 = this.line.substring (0, 3).trim ();
|
| 129 |
+
iAtom2 = this.line.substring (3, 6).trim ();
|
| 130 |
+
var order = this.parseIntRange (this.line, 6, 9);
|
| 131 |
+
if (this.optimize2D && order == 1 && this.line.length >= 12) stereo = this.parseIntRange (this.line, 9, 12);
|
| 132 |
+
order = this.fixOrder (order, stereo);
|
| 133 |
+
if (this.haveAtomSerials) this.asc.addNewBondFromNames (iAtom1, iAtom2, order);
|
| 134 |
+
else this.asc.addNewBondWithOrder (this.iatom0 + this.parseIntStr (iAtom1) - 1, this.iatom0 + this.parseIntStr (iAtom2) - 1, order);
|
| 135 |
+
}
|
| 136 |
+
var molData = new java.util.Hashtable ();
|
| 137 |
+
this.rd ();
|
| 138 |
+
while (this.line != null && this.line.indexOf ("$$$$") != 0) {
|
| 139 |
+
if (this.line.indexOf (">") == 0) {
|
| 140 |
+
this.readMolData (molData);
|
| 141 |
+
continue;
|
| 142 |
+
}if (this.line.startsWith ("M ISO")) {
|
| 143 |
+
this.readIsotopes ();
|
| 144 |
+
continue;
|
| 145 |
+
}this.rd ();
|
| 146 |
+
}
|
| 147 |
+
if (this.atomData != null) {
|
| 148 |
+
var atomValueName = molData.get ("atom_value_name");
|
| 149 |
+
molData.put (atomValueName == null ? "atom_values" : atomValueName.toString (), this.atomData);
|
| 150 |
+
}if (!molData.isEmpty ()) this.asc.setCurrentModelInfo ("molData", molData);
|
| 151 |
+
}, "~N,~N");
|
| 152 |
+
Clazz.defineMethod (c$, "readAtomValues",
|
| 153 |
+
function () {
|
| 154 |
+
this.atomData = new Array (this.atomCount);
|
| 155 |
+
for (var i = this.atomData.length; --i >= 0; ) this.atomData[i] = "";
|
| 156 |
+
|
| 157 |
+
while (this.line.indexOf ("V ") == 0) {
|
| 158 |
+
var iAtom = this.parseIntAt (this.line, 3);
|
| 159 |
+
if (iAtom < 1 || iAtom > this.atomCount) {
|
| 160 |
+
JU.Logger.error ("V nnn does not evalute to a valid atom number: " + iAtom);
|
| 161 |
+
return;
|
| 162 |
+
}var s = this.line.substring (6).trim ();
|
| 163 |
+
this.atomData[iAtom - 1] = s;
|
| 164 |
+
this.rd ();
|
| 165 |
+
}
|
| 166 |
+
});
|
| 167 |
+
Clazz.defineMethod (c$, "readIsotopes",
|
| 168 |
+
function () {
|
| 169 |
+
var n = this.parseIntAt (this.line, 6);
|
| 170 |
+
try {
|
| 171 |
+
var i0 = this.asc.getLastAtomSetAtomIndex ();
|
| 172 |
+
for (var i = 0, pt = 9; i < n; i++) {
|
| 173 |
+
var ipt = this.parseIntAt (this.line, pt);
|
| 174 |
+
var atom = this.asc.atoms[ipt + i0 - 1];
|
| 175 |
+
var iso = this.parseIntAt (this.line, pt + 4);
|
| 176 |
+
pt += 8;
|
| 177 |
+
atom.elementSymbol = "" + iso + JU.PT.replaceAllCharacters (atom.elementSymbol, "0123456789", "");
|
| 178 |
+
}
|
| 179 |
+
} catch (e) {
|
| 180 |
+
}
|
| 181 |
+
this.rd ();
|
| 182 |
+
});
|
| 183 |
+
Clazz.defineMethod (c$, "readMolData",
|
| 184 |
+
function (molData) {
|
| 185 |
+
var atoms = this.asc.atoms;
|
| 186 |
+
var dataName = JU.PT.trim (this.line, "> <").toLowerCase ();
|
| 187 |
+
var data = "";
|
| 188 |
+
var fdata = null;
|
| 189 |
+
while (this.rd () != null && !this.line.equals ("$$$$") && this.line.length > 0) data += (this.line.length == 81 && this.line.charAt (80) == '+' ? this.line.substring (0, 80) : this.line + "\n");
|
| 190 |
+
|
| 191 |
+
data = JU.PT.trim (data, "\n");
|
| 192 |
+
JU.Logger.info (dataName + ":" + JU.PT.esc (data));
|
| 193 |
+
molData.put (dataName, data);
|
| 194 |
+
var ndata = 0;
|
| 195 |
+
if (dataName.toUpperCase ().contains ("_PARTIAL_CHARGES")) {
|
| 196 |
+
try {
|
| 197 |
+
fdata = JU.PT.parseFloatArray (data);
|
| 198 |
+
for (var i = this.asc.getLastAtomSetAtomIndex (), n = this.asc.ac; i < n; i++) atoms[i].partialCharge = 0;
|
| 199 |
+
|
| 200 |
+
var pt = 0;
|
| 201 |
+
for (var i = Clazz.floatToInt (fdata[pt++]); --i >= 0; ) {
|
| 202 |
+
var atomIndex = Clazz.floatToInt (fdata[pt++]) + this.iatom0 - 1;
|
| 203 |
+
var partialCharge = fdata[pt++];
|
| 204 |
+
atoms[atomIndex].partialCharge = partialCharge;
|
| 205 |
+
ndata++;
|
| 206 |
+
}
|
| 207 |
+
} catch (e) {
|
| 208 |
+
for (var i = this.asc.getLastAtomSetAtomIndex (), n = this.asc.ac; i < n; i++) atoms[i].partialCharge = 0;
|
| 209 |
+
|
| 210 |
+
JU.Logger.error ("error reading " + dataName + " field -- partial charges cleared");
|
| 211 |
+
}
|
| 212 |
+
JU.Logger.info (ndata + " partial charges read");
|
| 213 |
+
} else if (dataName.toUpperCase ().contains ("ATOM_NAMES")) {
|
| 214 |
+
ndata = 0;
|
| 215 |
+
try {
|
| 216 |
+
var tokens = JU.PT.getTokens (data);
|
| 217 |
+
var pt = 0;
|
| 218 |
+
for (var i = this.parseIntStr (tokens[pt++]); --i >= 0; ) {
|
| 219 |
+
var iatom;
|
| 220 |
+
while ((iatom = this.parseIntStr (tokens[pt++])) == -2147483648) {
|
| 221 |
+
}
|
| 222 |
+
var atomIndex = iatom + this.iatom0 - 1;
|
| 223 |
+
var name = tokens[pt++];
|
| 224 |
+
if (!name.equals (".")) atoms[atomIndex].atomName = name;
|
| 225 |
+
ndata++;
|
| 226 |
+
}
|
| 227 |
+
} catch (e) {
|
| 228 |
+
JU.Logger.error ("error reading " + dataName + " field");
|
| 229 |
+
}
|
| 230 |
+
JU.Logger.info (ndata + " atom names read");
|
| 231 |
+
}}, "java.util.Map");
|
| 232 |
+
Clazz.defineMethod (c$, "addMolAtom",
|
| 233 |
+
function (iAtom, isotope, elementSymbol, charge, x, y, z) {
|
| 234 |
+
switch (isotope) {
|
| 235 |
+
case 0:
|
| 236 |
+
break;
|
| 237 |
+
case 1:
|
| 238 |
+
elementSymbol = "1H";
|
| 239 |
+
break;
|
| 240 |
+
case 2:
|
| 241 |
+
elementSymbol = "2H";
|
| 242 |
+
break;
|
| 243 |
+
case 3:
|
| 244 |
+
elementSymbol = "3H";
|
| 245 |
+
break;
|
| 246 |
+
default:
|
| 247 |
+
elementSymbol = isotope + elementSymbol;
|
| 248 |
+
}
|
| 249 |
+
if (this.optimize2D && z != 0) this.optimize2D = false;
|
| 250 |
+
var atom = new J.adapter.smarter.Atom ();
|
| 251 |
+
atom.elementSymbol = elementSymbol;
|
| 252 |
+
atom.formalCharge = charge;
|
| 253 |
+
this.setAtomCoordXYZ (atom, x, y, z);
|
| 254 |
+
if (iAtom == -2147483648) {
|
| 255 |
+
this.asc.addAtom (atom);
|
| 256 |
+
} else {
|
| 257 |
+
this.haveAtomSerials = true;
|
| 258 |
+
atom.atomSerial = iAtom;
|
| 259 |
+
this.asc.addAtomWithMappedSerialNumber (atom);
|
| 260 |
+
}}, "~N,~N,~S,~N,~N,~N,~N");
|
| 261 |
+
Clazz.defineMethod (c$, "fixOrder",
|
| 262 |
+
function (order, stereo) {
|
| 263 |
+
switch (order) {
|
| 264 |
+
default:
|
| 265 |
+
case 0:
|
| 266 |
+
case -10:
|
| 267 |
+
return 1;
|
| 268 |
+
case 1:
|
| 269 |
+
switch (stereo) {
|
| 270 |
+
case 1:
|
| 271 |
+
return 1025;
|
| 272 |
+
case 3:
|
| 273 |
+
case 6:
|
| 274 |
+
return 1041;
|
| 275 |
+
}
|
| 276 |
+
break;
|
| 277 |
+
case 2:
|
| 278 |
+
case 3:
|
| 279 |
+
break;
|
| 280 |
+
case 4:
|
| 281 |
+
return 515;
|
| 282 |
+
case 5:
|
| 283 |
+
return 66;
|
| 284 |
+
case 6:
|
| 285 |
+
return 513;
|
| 286 |
+
case 7:
|
| 287 |
+
return 514;
|
| 288 |
+
case 8:
|
| 289 |
+
case 9:
|
| 290 |
+
return 33;
|
| 291 |
+
case 14:
|
| 292 |
+
return 4;
|
| 293 |
+
case 15:
|
| 294 |
+
return 5;
|
| 295 |
+
case 16:
|
| 296 |
+
return 6;
|
| 297 |
+
}
|
| 298 |
+
return order;
|
| 299 |
+
}, "~N,~N");
|
| 300 |
+
Clazz.defineMethod (c$, "addMolBond",
|
| 301 |
+
function (iAtom1, iAtom2, order, stereo) {
|
| 302 |
+
order = this.fixOrder (order, stereo);
|
| 303 |
+
if (this.haveAtomSerials) this.asc.addNewBondFromNames (iAtom1, iAtom2, order);
|
| 304 |
+
else this.asc.addNewBondWithOrder (this.iatom0 + this.parseIntStr (iAtom1) - 1, this.iatom0 + this.parseIntStr (iAtom2) - 1, order);
|
| 305 |
+
}, "~S,~S,~N,~N");
|
| 306 |
+
});
|
static/j2s/J/adapter/readers/molxyz/V3000Rdr.js
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Clazz.declarePackage ("J.adapter.readers.molxyz");
|
| 2 |
+
Clazz.load (null, "J.adapter.readers.molxyz.V3000Rdr", ["java.util.Hashtable", "JU.PT"], function () {
|
| 3 |
+
c$ = Clazz.decorateAsClass (function () {
|
| 4 |
+
this.mr = null;
|
| 5 |
+
this.line = null;
|
| 6 |
+
this.userData = null;
|
| 7 |
+
Clazz.instantialize (this, arguments);
|
| 8 |
+
}, J.adapter.readers.molxyz, "V3000Rdr");
|
| 9 |
+
Clazz.makeConstructor (c$,
|
| 10 |
+
function () {
|
| 11 |
+
});
|
| 12 |
+
Clazz.defineMethod (c$, "set",
|
| 13 |
+
function (mr) {
|
| 14 |
+
this.mr = mr;
|
| 15 |
+
return this;
|
| 16 |
+
}, "J.adapter.smarter.AtomSetCollectionReader");
|
| 17 |
+
Clazz.defineMethod (c$, "readAtomsAndBonds",
|
| 18 |
+
function (tokens) {
|
| 19 |
+
var ac = this.mr.parseIntStr (tokens[3]);
|
| 20 |
+
this.readAtoms (ac);
|
| 21 |
+
this.readBonds (this.mr.parseIntStr (tokens[4]));
|
| 22 |
+
this.readUserData (ac);
|
| 23 |
+
}, "~A");
|
| 24 |
+
Clazz.defineMethod (c$, "readAtoms",
|
| 25 |
+
function (ac) {
|
| 26 |
+
this.mr.discardLinesUntilContains ("BEGIN ATOM");
|
| 27 |
+
for (var i = 0; i < ac; ++i) {
|
| 28 |
+
this.rd ();
|
| 29 |
+
this.checkLineContinuation ();
|
| 30 |
+
var tokens = this.mr.getTokens ();
|
| 31 |
+
var iAtom = this.mr.parseIntStr (tokens[2]);
|
| 32 |
+
var elementSymbol = tokens[3];
|
| 33 |
+
if (elementSymbol.equals ("*")) continue;
|
| 34 |
+
var x = this.mr.parseFloatStr (tokens[4]);
|
| 35 |
+
var y = this.mr.parseFloatStr (tokens[5]);
|
| 36 |
+
var z = this.mr.parseFloatStr (tokens[6]);
|
| 37 |
+
var charge = 0;
|
| 38 |
+
var isotope = 0;
|
| 39 |
+
for (var j = 7; j < tokens.length; j++) {
|
| 40 |
+
var s = tokens[j].toUpperCase ();
|
| 41 |
+
if (s.startsWith ("CHG=")) charge = this.mr.parseIntAt (tokens[j], 4);
|
| 42 |
+
else if (s.startsWith ("MASS=")) isotope = this.mr.parseIntAt (tokens[j], 5);
|
| 43 |
+
}
|
| 44 |
+
if (isotope > 1 && elementSymbol.equals ("H")) isotope = 1 - isotope;
|
| 45 |
+
this.mr.addMolAtom (iAtom, isotope, elementSymbol, charge, x, y, z);
|
| 46 |
+
}
|
| 47 |
+
this.mr.discardLinesUntilContains ("END ATOM");
|
| 48 |
+
}, "~N");
|
| 49 |
+
Clazz.defineMethod (c$, "readBonds",
|
| 50 |
+
function (bondCount) {
|
| 51 |
+
this.mr.discardLinesUntilContains ("BEGIN BOND");
|
| 52 |
+
if (bondCount == 0) this.mr.asc.setNoAutoBond ();
|
| 53 |
+
for (var i = 0; i < bondCount; ++i) {
|
| 54 |
+
this.rd ();
|
| 55 |
+
var stereo = 0;
|
| 56 |
+
this.checkLineContinuation ();
|
| 57 |
+
var tokens = this.mr.getTokens ();
|
| 58 |
+
var order = this.mr.parseIntStr (tokens[3]);
|
| 59 |
+
var iAtom1 = tokens[4];
|
| 60 |
+
var iAtom2 = tokens[5];
|
| 61 |
+
var cfg = this.getField ("CFG");
|
| 62 |
+
if (cfg == null) {
|
| 63 |
+
var endpts = this.getField ("ENDPTS");
|
| 64 |
+
if (endpts != null && this.line.indexOf ("ATTACH=ALL") >= 0) {
|
| 65 |
+
tokens = JU.PT.getTokens (endpts);
|
| 66 |
+
var n = this.mr.parseIntStr (tokens[0]);
|
| 67 |
+
var o = this.mr.fixOrder (order, 0);
|
| 68 |
+
for (var k = 1; k <= n; k++) this.mr.asc.addNewBondFromNames (iAtom1, tokens[k], o);
|
| 69 |
+
|
| 70 |
+
}} else {
|
| 71 |
+
stereo = this.mr.parseIntStr (cfg);
|
| 72 |
+
}this.mr.addMolBond (iAtom1, iAtom2, order, stereo);
|
| 73 |
+
}
|
| 74 |
+
this.mr.discardLinesUntilContains ("END BOND");
|
| 75 |
+
}, "~N");
|
| 76 |
+
Clazz.defineMethod (c$, "readUserData",
|
| 77 |
+
function (ac) {
|
| 78 |
+
this.userData = null;
|
| 79 |
+
var pc = null;
|
| 80 |
+
while (!this.rd ().contains ("END CTAB")) {
|
| 81 |
+
if (!this.line.contains ("BEGIN SGROUP")) continue;
|
| 82 |
+
var atoms;
|
| 83 |
+
var name;
|
| 84 |
+
var data;
|
| 85 |
+
while (!this.rd ().contains ("END SGROUP")) {
|
| 86 |
+
if (this.userData == null) this.userData = new java.util.Hashtable ();
|
| 87 |
+
if ((atoms = this.getField ("ATOMS")) == null || (name = this.getField ("FIELDNAME")) == null || (data = this.getField ("FIELDDATA")) == null) continue;
|
| 88 |
+
name = name.toLowerCase ();
|
| 89 |
+
var isPartial = (name.indexOf ("partial") >= 0);
|
| 90 |
+
if (isPartial) {
|
| 91 |
+
if (pc == null) pc = name;
|
| 92 |
+
else if (!pc.equals (name)) isPartial = false;
|
| 93 |
+
}if (isPartial) {
|
| 94 |
+
var at = this.mr.asc.atoms;
|
| 95 |
+
for (var i = this.mr.asc.getLastAtomSetAtomIndex (), n = this.mr.asc.ac; i < n; i++) at[i].partialCharge = 0;
|
| 96 |
+
|
| 97 |
+
}var a = null;
|
| 98 |
+
var f = 0;
|
| 99 |
+
if (isPartial) f = this.mr.parseFloatStr (data);
|
| 100 |
+
else if ((a = this.userData.get (name)) == null) this.userData.put (name, a = new Array (ac));
|
| 101 |
+
try {
|
| 102 |
+
var tokens = JU.PT.getTokens (atoms);
|
| 103 |
+
for (var i = tokens.length; --i >= 1; ) {
|
| 104 |
+
var atom = tokens[i];
|
| 105 |
+
if (isPartial) this.mr.asc.getAtomFromName (atom).partialCharge = f;
|
| 106 |
+
else a[this.mr.parseIntStr (atom) - 1] = data;
|
| 107 |
+
}
|
| 108 |
+
} catch (e) {
|
| 109 |
+
if (Clazz.exceptionOf (e, Exception)) {
|
| 110 |
+
} else {
|
| 111 |
+
throw e;
|
| 112 |
+
}
|
| 113 |
+
}
|
| 114 |
+
}
|
| 115 |
+
}
|
| 116 |
+
if (this.userData == null) return;
|
| 117 |
+
for (var key, $key = this.userData.keySet ().iterator (); $key.hasNext () && ((key = $key.next ()) || true);) {
|
| 118 |
+
var a = this.userData.get (key);
|
| 119 |
+
var f = Clazz.newFloatArray (a.length, 0);
|
| 120 |
+
for (var i = 0; i < a.length; i++) f[i] = (a[i] == null ? 0 : this.mr.parseFloatStr (a[i]));
|
| 121 |
+
|
| 122 |
+
this.mr.asc.setAtomProperties (key, f, -1, false);
|
| 123 |
+
}
|
| 124 |
+
}, "~N");
|
| 125 |
+
Clazz.defineMethod (c$, "getField",
|
| 126 |
+
function (key) {
|
| 127 |
+
var pt = this.line.indexOf (key + "=");
|
| 128 |
+
if (pt < 0) return null;
|
| 129 |
+
pt += key.length + 1;
|
| 130 |
+
var term = ' ';
|
| 131 |
+
switch (this.line.charAt (pt)) {
|
| 132 |
+
case '"':
|
| 133 |
+
term = '"';
|
| 134 |
+
break;
|
| 135 |
+
case '(':
|
| 136 |
+
term = ')';
|
| 137 |
+
break;
|
| 138 |
+
case '+':
|
| 139 |
+
break;
|
| 140 |
+
default:
|
| 141 |
+
pt--;
|
| 142 |
+
break;
|
| 143 |
+
}
|
| 144 |
+
return this.line.substring (pt + 1, (this.line + term).indexOf (term, pt + 1));
|
| 145 |
+
}, "~S");
|
| 146 |
+
Clazz.defineMethod (c$, "rd",
|
| 147 |
+
function () {
|
| 148 |
+
return (this.line = this.mr.rd ());
|
| 149 |
+
});
|
| 150 |
+
Clazz.defineMethod (c$, "checkLineContinuation",
|
| 151 |
+
function () {
|
| 152 |
+
while (this.line.endsWith ("-")) {
|
| 153 |
+
var s = this.line;
|
| 154 |
+
this.rd ();
|
| 155 |
+
this.line = s + this.line;
|
| 156 |
+
}
|
| 157 |
+
});
|
| 158 |
+
});
|
static/j2s/J/adapter/readers/molxyz/XyzReader.js
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Clazz.declarePackage ("J.adapter.readers.molxyz");
|
| 2 |
+
Clazz.load (["J.adapter.smarter.AtomSetCollectionReader"], "J.adapter.readers.molxyz.XyzReader", ["java.lang.Float", "JU.Logger"], function () {
|
| 3 |
+
c$ = Clazz.declareType (J.adapter.readers.molxyz, "XyzReader", J.adapter.smarter.AtomSetCollectionReader);
|
| 4 |
+
Clazz.overrideMethod (c$, "checkLine",
|
| 5 |
+
function () {
|
| 6 |
+
var modelAtomCount = this.parseIntStr (this.line);
|
| 7 |
+
if (modelAtomCount == -2147483648) {
|
| 8 |
+
this.continuing = false;
|
| 9 |
+
return false;
|
| 10 |
+
}this.vibrationNumber = ++this.modelNumber;
|
| 11 |
+
if (this.desiredVibrationNumber <= 0 ? this.doGetModel (this.modelNumber, null) : this.doGetVibration (this.vibrationNumber)) {
|
| 12 |
+
this.rd ();
|
| 13 |
+
this.checkCurrentLineForScript ();
|
| 14 |
+
this.asc.newAtomSet ();
|
| 15 |
+
var name = this.line.trim ();
|
| 16 |
+
this.readAtoms (modelAtomCount);
|
| 17 |
+
this.applySymmetryAndSetTrajectory ();
|
| 18 |
+
this.asc.setAtomSetName (name);
|
| 19 |
+
if (this.isLastModel (this.modelNumber)) {
|
| 20 |
+
this.continuing = false;
|
| 21 |
+
return false;
|
| 22 |
+
}} else {
|
| 23 |
+
this.skipAtomSet (modelAtomCount);
|
| 24 |
+
}this.discardLinesUntilNonBlank ();
|
| 25 |
+
return false;
|
| 26 |
+
});
|
| 27 |
+
Clazz.overrideMethod (c$, "finalizeSubclassReader",
|
| 28 |
+
function () {
|
| 29 |
+
this.isTrajectory = false;
|
| 30 |
+
this.finalizeReaderASCR ();
|
| 31 |
+
});
|
| 32 |
+
Clazz.defineMethod (c$, "skipAtomSet",
|
| 33 |
+
function (modelAtomCount) {
|
| 34 |
+
this.rd ();
|
| 35 |
+
for (var i = modelAtomCount; --i >= 0; ) this.rd ();
|
| 36 |
+
|
| 37 |
+
}, "~N");
|
| 38 |
+
Clazz.defineMethod (c$, "readAtoms",
|
| 39 |
+
function (modelAtomCount) {
|
| 40 |
+
for (var i = 0; i < modelAtomCount; ++i) {
|
| 41 |
+
this.rd ();
|
| 42 |
+
var tokens = this.getTokens ();
|
| 43 |
+
if (tokens.length < 4) {
|
| 44 |
+
JU.Logger.warn ("line cannot be read for XYZ atom data: " + this.line);
|
| 45 |
+
continue;
|
| 46 |
+
}var atom = this.addAtomXYZSymName (tokens, 1, null, null);
|
| 47 |
+
this.setElementAndIsotope (atom, tokens[0]);
|
| 48 |
+
var vpt = 4;
|
| 49 |
+
switch (tokens.length) {
|
| 50 |
+
case 4:
|
| 51 |
+
continue;
|
| 52 |
+
case 5:
|
| 53 |
+
case 6:
|
| 54 |
+
case 8:
|
| 55 |
+
case 9:
|
| 56 |
+
if (tokens[4].indexOf (".") >= 0) {
|
| 57 |
+
atom.partialCharge = this.parseFloatStr (tokens[4]);
|
| 58 |
+
} else {
|
| 59 |
+
var charge = this.parseIntStr (tokens[4]);
|
| 60 |
+
if (charge != -2147483648) atom.formalCharge = charge;
|
| 61 |
+
}switch (tokens.length) {
|
| 62 |
+
case 5:
|
| 63 |
+
continue;
|
| 64 |
+
case 6:
|
| 65 |
+
atom.radius = this.parseFloatStr (tokens[5]);
|
| 66 |
+
continue;
|
| 67 |
+
case 9:
|
| 68 |
+
atom.atomSerial = this.parseIntStr (tokens[8]);
|
| 69 |
+
}
|
| 70 |
+
vpt++;
|
| 71 |
+
default:
|
| 72 |
+
var vx = this.parseFloatStr (tokens[vpt++]);
|
| 73 |
+
var vy = this.parseFloatStr (tokens[vpt++]);
|
| 74 |
+
var vz = this.parseFloatStr (tokens[vpt++]);
|
| 75 |
+
if (Float.isNaN (vx) || Float.isNaN (vy) || Float.isNaN (vz)) continue;
|
| 76 |
+
this.asc.addVibrationVector (atom.index, vx, vy, vz);
|
| 77 |
+
}
|
| 78 |
+
}
|
| 79 |
+
}, "~N");
|
| 80 |
+
});
|
static/j2s/J/adapter/readers/more/BinaryDcdReader.js
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Clazz.declarePackage ("J.adapter.readers.more");
|
| 2 |
+
Clazz.load (["J.adapter.smarter.AtomSetCollectionReader"], "J.adapter.readers.more.BinaryDcdReader", ["java.lang.Boolean", "JU.BS", "$.P3", "$.SB", "JU.Escape", "$.Logger"], function () {
|
| 3 |
+
c$ = Clazz.decorateAsClass (function () {
|
| 4 |
+
this.nModels = 0;
|
| 5 |
+
this.nAtoms = 0;
|
| 6 |
+
this.nFree = 0;
|
| 7 |
+
this.bsFree = null;
|
| 8 |
+
this.xAll = null;
|
| 9 |
+
this.yAll = null;
|
| 10 |
+
this.zAll = null;
|
| 11 |
+
this.crystGroup = 0;
|
| 12 |
+
Clazz.instantialize (this, arguments);
|
| 13 |
+
}, J.adapter.readers.more, "BinaryDcdReader", J.adapter.smarter.AtomSetCollectionReader);
|
| 14 |
+
Clazz.overrideMethod (c$, "setup",
|
| 15 |
+
function (fullPath, htParams, reader) {
|
| 16 |
+
this.isBinary = true;
|
| 17 |
+
this.requiresBSFilter = true;
|
| 18 |
+
this.setupASCR (fullPath, htParams, reader);
|
| 19 |
+
}, "~S,java.util.Map,~O");
|
| 20 |
+
Clazz.overrideMethod (c$, "initializeReader",
|
| 21 |
+
function () {
|
| 22 |
+
this.initializeTrajectoryFile ();
|
| 23 |
+
this.asc.setInfo ("ignoreUnitCell", Boolean.TRUE);
|
| 24 |
+
});
|
| 25 |
+
Clazz.overrideMethod (c$, "processBinaryDocument",
|
| 26 |
+
function () {
|
| 27 |
+
var bytes = Clazz.newByteArray (40, 0);
|
| 28 |
+
this.binaryDoc.setStream (null, this.binaryDoc.readInt () == 0x54);
|
| 29 |
+
this.binaryDoc.readInt ();
|
| 30 |
+
this.nModels = this.binaryDoc.readInt ();
|
| 31 |
+
var nPriv = this.binaryDoc.readInt ();
|
| 32 |
+
var nSaveC = this.binaryDoc.readInt ();
|
| 33 |
+
var nStep = this.binaryDoc.readInt ();
|
| 34 |
+
this.binaryDoc.readInt ();
|
| 35 |
+
this.binaryDoc.readInt ();
|
| 36 |
+
this.binaryDoc.readInt ();
|
| 37 |
+
var ndegf = this.binaryDoc.readInt ();
|
| 38 |
+
this.nFree = Clazz.doubleToInt (ndegf / 3);
|
| 39 |
+
var nFixed = this.binaryDoc.readInt ();
|
| 40 |
+
var delta4 = this.binaryDoc.readInt ();
|
| 41 |
+
this.crystGroup = this.binaryDoc.readInt ();
|
| 42 |
+
this.binaryDoc.readByteArray (bytes, 0, 32);
|
| 43 |
+
this.binaryDoc.readInt ();
|
| 44 |
+
this.binaryDoc.readInt ();
|
| 45 |
+
this.binaryDoc.readInt ();
|
| 46 |
+
var sb = new JU.SB ();
|
| 47 |
+
for (var i = 0, n = this.binaryDoc.readInt (); i < n; i++) sb.append (this.trimString (this.binaryDoc.readString (80))).appendC ('\n');
|
| 48 |
+
|
| 49 |
+
this.binaryDoc.readInt ();
|
| 50 |
+
JU.Logger.info ("BinaryDcdReadaer:\n" + sb);
|
| 51 |
+
this.binaryDoc.readInt ();
|
| 52 |
+
this.nAtoms = this.binaryDoc.readInt ();
|
| 53 |
+
this.binaryDoc.readInt ();
|
| 54 |
+
this.nFree = this.nAtoms - nFixed;
|
| 55 |
+
if (nFixed != 0) {
|
| 56 |
+
this.binaryDoc.readInt ();
|
| 57 |
+
this.bsFree = JU.BS.newN (this.nFree);
|
| 58 |
+
for (var i = 0; i < this.nFree; i++) this.bsFree.set (this.binaryDoc.readInt () - 1);
|
| 59 |
+
|
| 60 |
+
this.binaryDoc.readInt ();
|
| 61 |
+
JU.Logger.info ("free: " + this.bsFree.cardinality () + " " + JU.Escape.eBS (this.bsFree));
|
| 62 |
+
}this.readCoordinates ();
|
| 63 |
+
JU.Logger.info ("Total number of trajectory steps=" + this.trajectorySteps.size ());
|
| 64 |
+
});
|
| 65 |
+
Clazz.defineMethod (c$, "trimString",
|
| 66 |
+
function (s) {
|
| 67 |
+
var pt = s.indexOf ('\0');
|
| 68 |
+
if (pt >= 0) s = s.substring (0, pt);
|
| 69 |
+
return s.trim ();
|
| 70 |
+
}, "~S");
|
| 71 |
+
Clazz.defineMethod (c$, "readFloatArray",
|
| 72 |
+
function () {
|
| 73 |
+
var n = Clazz.doubleToInt (this.binaryDoc.readInt () / 4);
|
| 74 |
+
var data = Clazz.newFloatArray (n, 0);
|
| 75 |
+
for (var i = 0; i < n; i++) data[i] = this.binaryDoc.readFloat ();
|
| 76 |
+
|
| 77 |
+
this.binaryDoc.readInt ();
|
| 78 |
+
return data;
|
| 79 |
+
});
|
| 80 |
+
Clazz.defineMethod (c$, "readDoubleArray",
|
| 81 |
+
function () {
|
| 82 |
+
var n = Clazz.doubleToInt (this.binaryDoc.readInt () / 8);
|
| 83 |
+
var data = Clazz.newDoubleArray (n, 0);
|
| 84 |
+
for (var i = 0; i < n; i++) data[i] = this.binaryDoc.readDouble ();
|
| 85 |
+
|
| 86 |
+
this.binaryDoc.readInt ();
|
| 87 |
+
return data;
|
| 88 |
+
});
|
| 89 |
+
Clazz.defineMethod (c$, "readCoordinates",
|
| 90 |
+
function () {
|
| 91 |
+
var ac = (this.bsFilter == null ? this.templateAtomCount : (this.htParams.get ("filteredAtomCount")).intValue ());
|
| 92 |
+
for (var i = 0; i < this.nModels; i++) if (this.doGetModel (++this.modelNumber, null)) {
|
| 93 |
+
var trajectoryStep = new Array (ac);
|
| 94 |
+
if (!this.getTrajectoryStep (trajectoryStep)) return;
|
| 95 |
+
this.trajectorySteps.addLast (trajectoryStep);
|
| 96 |
+
if (this.isLastModel (this.modelNumber)) return;
|
| 97 |
+
} else {
|
| 98 |
+
if (this.crystGroup > 0) this.readDoubleArray ();
|
| 99 |
+
this.readFloatArray ();
|
| 100 |
+
this.readFloatArray ();
|
| 101 |
+
this.readFloatArray ();
|
| 102 |
+
}
|
| 103 |
+
});
|
| 104 |
+
Clazz.defineMethod (c$, "getTrajectoryStep",
|
| 105 |
+
function (trajectoryStep) {
|
| 106 |
+
try {
|
| 107 |
+
var ac = trajectoryStep.length;
|
| 108 |
+
var n = -1;
|
| 109 |
+
if (this.crystGroup > 0) this.calcUnitCell (this.readDoubleArray ());
|
| 110 |
+
var x = this.readFloatArray ();
|
| 111 |
+
var y = this.readFloatArray ();
|
| 112 |
+
var z = this.readFloatArray ();
|
| 113 |
+
var bs = (this.xAll == null ? null : this.bsFree);
|
| 114 |
+
if (bs == null) {
|
| 115 |
+
this.xAll = x;
|
| 116 |
+
this.yAll = y;
|
| 117 |
+
this.zAll = z;
|
| 118 |
+
}for (var i = 0, vpt = 0; i < this.nAtoms; i++) {
|
| 119 |
+
var pt = new JU.P3 ();
|
| 120 |
+
if (bs == null || bs.get (i)) {
|
| 121 |
+
pt.set (x[vpt], y[vpt], z[vpt]);
|
| 122 |
+
vpt++;
|
| 123 |
+
} else {
|
| 124 |
+
pt.set (this.xAll[i], this.yAll[i], this.zAll[i]);
|
| 125 |
+
}if (this.bsFilter == null || this.bsFilter.get (i)) {
|
| 126 |
+
if (++n == ac) return true;
|
| 127 |
+
trajectoryStep[n] = pt;
|
| 128 |
+
}}
|
| 129 |
+
return true;
|
| 130 |
+
} catch (e) {
|
| 131 |
+
if (Clazz.exceptionOf (e, Exception)) {
|
| 132 |
+
return false;
|
| 133 |
+
} else {
|
| 134 |
+
throw e;
|
| 135 |
+
}
|
| 136 |
+
}
|
| 137 |
+
}, "~A");
|
| 138 |
+
Clazz.defineMethod (c$, "calcUnitCell",
|
| 139 |
+
function (abc) {
|
| 140 |
+
var a = abc[0];
|
| 141 |
+
var angle1 = abc[1];
|
| 142 |
+
var b = abc[2];
|
| 143 |
+
var angle2 = abc[3];
|
| 144 |
+
var angle3 = abc[4];
|
| 145 |
+
var c = abc[5];
|
| 146 |
+
var alpha = (1.5707963267948966 - Math.asin (angle3)) * 180 / 3.141592653589793;
|
| 147 |
+
var beta = (1.5707963267948966 - Math.asin (angle2)) * 180 / 3.141592653589793;
|
| 148 |
+
var gamma = (1.5707963267948966 - Math.asin (angle1)) * 180 / 3.141592653589793;
|
| 149 |
+
System.out.println ("unitcell:[" + a + " " + b + " " + c + " " + alpha + " " + beta + " " + gamma + "]");
|
| 150 |
+
return Clazz.newFloatArray (-1, [a, b, c, alpha, beta, gamma]);
|
| 151 |
+
}, "~A");
|
| 152 |
+
});
|
static/j2s/J/adapter/readers/more/ForceFieldReader.js
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Clazz.declarePackage ("J.adapter.readers.more");
|
| 2 |
+
Clazz.load (["J.adapter.smarter.AtomSetCollectionReader", "java.util.Properties"], "J.adapter.readers.more.ForceFieldReader", ["java.lang.Character", "JU.PT", "J.adapter.smarter.Atom"], function () {
|
| 3 |
+
c$ = Clazz.decorateAsClass (function () {
|
| 4 |
+
this.userAtomTypes = null;
|
| 5 |
+
this.atomTypes = null;
|
| 6 |
+
Clazz.instantialize (this, arguments);
|
| 7 |
+
}, J.adapter.readers.more, "ForceFieldReader", J.adapter.smarter.AtomSetCollectionReader);
|
| 8 |
+
Clazz.prepareFields (c$, function () {
|
| 9 |
+
this.atomTypes = new java.util.Properties ();
|
| 10 |
+
});
|
| 11 |
+
Clazz.defineMethod (c$, "setUserAtomTypes",
|
| 12 |
+
function () {
|
| 13 |
+
this.userAtomTypes = this.htParams.get ("atomTypes");
|
| 14 |
+
if (this.userAtomTypes != null) this.userAtomTypes = ";" + this.userAtomTypes + ";";
|
| 15 |
+
});
|
| 16 |
+
Clazz.defineMethod (c$, "getElementSymbol",
|
| 17 |
+
function (atom, atomType) {
|
| 18 |
+
var elementSymbol = this.atomTypes.get (atomType);
|
| 19 |
+
if (elementSymbol != null) {
|
| 20 |
+
atom.elementSymbol = elementSymbol;
|
| 21 |
+
return true;
|
| 22 |
+
}var nChar = atomType.length;
|
| 23 |
+
var haveSymbol = (nChar < 2);
|
| 24 |
+
var ptType;
|
| 25 |
+
if (this.userAtomTypes != null && (ptType = this.userAtomTypes.indexOf (";" + atomType + "=>")) >= 0) {
|
| 26 |
+
ptType += nChar + 3;
|
| 27 |
+
elementSymbol = this.userAtomTypes.substring (ptType, this.userAtomTypes.indexOf (";", ptType)).trim ();
|
| 28 |
+
haveSymbol = true;
|
| 29 |
+
} else if (nChar == 1) {
|
| 30 |
+
elementSymbol = atomType.toUpperCase ();
|
| 31 |
+
haveSymbol = true;
|
| 32 |
+
} else {
|
| 33 |
+
var ch0 = atomType.charAt (0);
|
| 34 |
+
var ch1 = atomType.charAt (1);
|
| 35 |
+
var isXx = (JU.PT.isUpperCase (ch0) && JU.PT.isLowerCase (ch1));
|
| 36 |
+
if (" IM IP sz az sy ay ayt ".indexOf (atomType) >= 0) {
|
| 37 |
+
if (ch0 == 'I') {
|
| 38 |
+
elementSymbol = atom.atomName.substring (0, 2);
|
| 39 |
+
if (!JU.PT.isLowerCase (elementSymbol.charAt (1))) elementSymbol = elementSymbol.substring (0, 1);
|
| 40 |
+
} else {
|
| 41 |
+
elementSymbol = (ch0 == 's' ? "Si" : "Al");
|
| 42 |
+
}} else if (nChar == 2 && isXx) {
|
| 43 |
+
} else if (JU.PT.isLetter (ch0) && !JU.PT.isLetter (ch1)) {
|
| 44 |
+
elementSymbol = "" + Character.toUpperCase (ch0);
|
| 45 |
+
} else if (nChar > 2 && isXx && !JU.PT.isLetter (atomType.charAt (2))) {
|
| 46 |
+
elementSymbol = "" + ch0 + ch1;
|
| 47 |
+
} else {
|
| 48 |
+
ch0 = Character.toUpperCase (ch0);
|
| 49 |
+
var check = " " + atomType + " ";
|
| 50 |
+
if (" CA CB CC CD CE CF CG CH CI CJ CK CM CN CP CQ CR CT CV CW HA HP HC HO HS HW LP NA NB NC NT OH OS OW SH AH BH HT HY AC BC CS OA OB OE OT dw hc hi hn ho hp hs hw hscp htip ca cg ci cn co coh cp cr cs ct c3h c3m c4h c4m na nb nh nho nh+ ni nn np npc nr nt nz oc oe oh op oscp otip sc sh sp br cl ca+ ar si lp nu sz oz az pz ga ge tioc titd li+ na+ rb+ cs+ mg2+ ca2+ ba2+ cu2+ cl- br- so4 sy oy ay ayt nac+ mg2c fe2c mn4c mn3c co2c ni2c lic+ pd2+ ti4c sr2c ca2c cly- hocl py vy nh4+ so4y lioh naoh koh foh cloh beoh al CE1 CF1 CF2 CF3 CG CD2 CH1E CH2E CH3E CM CP3 CPH1 CPH2 CQ66 CR55 CR56 CR66 CS66 CT CT3 CT4 CUA1 CUA2 CUA3 CUY1 CUY2 HA HC HMU HO HT LP NC NC2 NO2 NP NR1 NR2 NR3 NR55 NR56 NR66 NT NX OA OAC OC OE OH2 OK OM OS OSH OSI OT OW PO3 PO4 PT PUA1 PUY1 SE SH1E SK SO1 SO2 SO3 SO4 ST ST2 ST2 br br- br1 cl cl- cl1 cl12 cl13 cl14 cl1p ca+ cu+2 fe+2 mg+2 zn+2 cs+ li+ na+ rb+ al4z si si4 si4c si4z ar he kr ne xe dw hi hw ca cg ci co coh cp cr cs ct ct3 na nb nh nho ni no np nt nt2 nz oa oc oh op os ot sp bt cl\' si4l si5l si5t si6 si6o si\' br ca cc cd ce cf cl cp cq cu cv cx cy ha hc hn ho hp hs na nb nc nd nh oh os pb pc pd pe pf px py sh ss sx sy hn2 ho2 cz oo oz si sio hsi osi ".indexOf (check) < 0) {
|
| 51 |
+
} else if (" AH BH AC BC ".indexOf (check) >= 0) {
|
| 52 |
+
elementSymbol = "" + ch1;
|
| 53 |
+
} else if (" al al4z ar ba2+ beoh br br- br1 ca+ ca2+ ca2c cl cl\' cl- cl1 cl12 cl13 cl14 cl1p cloh cly- co2c cs+ cu+2 cu2+ fe+2 fe2c ga ge he kr li+ lic+ lioh lp LP mg+2 mg2+ mg2c mn3c mn4c na+ nac+ naoh ne ni2c nu pd2+ rb+ si si\' si4 si4c si4l si4z si5l si5t si6 si6o sio sr2c ti4c tioc titd xe zn+2 ".indexOf (check) >= 0) {
|
| 54 |
+
elementSymbol = "" + ch0 + ch1;
|
| 55 |
+
} else {
|
| 56 |
+
elementSymbol = "" + ch0;
|
| 57 |
+
}}if (elementSymbol == null) {
|
| 58 |
+
elementSymbol = "" + ch0 + Character.toLowerCase (ch1);
|
| 59 |
+
} else {
|
| 60 |
+
haveSymbol = true;
|
| 61 |
+
}}atom.elementSymbol = elementSymbol;
|
| 62 |
+
if (haveSymbol) this.atomTypes.put (atomType, elementSymbol);
|
| 63 |
+
return haveSymbol;
|
| 64 |
+
}, "J.adapter.smarter.Atom,~S");
|
| 65 |
+
c$.deducePdbElementSymbol = Clazz.defineMethod (c$, "deducePdbElementSymbol",
|
| 66 |
+
function (isHetero, XX, group3) {
|
| 67 |
+
var i = XX.indexOf ('\0');
|
| 68 |
+
var atomType = null;
|
| 69 |
+
if (i >= 0) {
|
| 70 |
+
atomType = XX.substring (i + 1);
|
| 71 |
+
XX = XX.substring (0, i);
|
| 72 |
+
if (atomType != null && atomType.length == 1) return atomType;
|
| 73 |
+
}if (XX.equalsIgnoreCase (group3)) return XX;
|
| 74 |
+
var len = XX.length;
|
| 75 |
+
var ch1 = ' ';
|
| 76 |
+
i = 0;
|
| 77 |
+
while (i < len && (ch1 = XX.charAt (i++)) <= '9') {
|
| 78 |
+
}
|
| 79 |
+
var ch2 = (i < len ? XX.charAt (i) : ' ');
|
| 80 |
+
var full = group3 + "." + ch1 + ch2;
|
| 81 |
+
if (("OEC.CA ICA.CA OC1.CA OC2.CA OC4.CA").indexOf (full) >= 0) return "Ca";
|
| 82 |
+
if (XX.indexOf ("'") > 0 || XX.indexOf ("*") >= 0 || "HCNO".indexOf (ch1) >= 0 && ch2 <= 'H' || XX.startsWith ("CM")) return "" + ch1;
|
| 83 |
+
if (isHetero && J.adapter.smarter.Atom.isValidSymNoCase (ch1, ch2)) return ("" + ch1 + ch2).trim ();
|
| 84 |
+
if (J.adapter.smarter.Atom.isValidSym1 (ch1)) return "" + ch1;
|
| 85 |
+
if (J.adapter.smarter.Atom.isValidSym1 (ch2)) return "" + ch2;
|
| 86 |
+
return "Xx";
|
| 87 |
+
}, "~B,~S,~S");
|
| 88 |
+
Clazz.defineStatics (c$,
|
| 89 |
+
"ffTypes", " CA CB CC CD CE CF CG CH CI CJ CK CM CN CP CQ CR CT CV CW HA HP HC HO HS HW LP NA NB NC NT OH OS OW SH AH BH HT HY AC BC CS OA OB OE OT dw hc hi hn ho hp hs hw hscp htip ca cg ci cn co coh cp cr cs ct c3h c3m c4h c4m na nb nh nho nh+ ni nn np npc nr nt nz oc oe oh op oscp otip sc sh sp br cl ca+ ar si lp nu sz oz az pz ga ge tioc titd li+ na+ rb+ cs+ mg2+ ca2+ ba2+ cu2+ cl- br- so4 sy oy ay ayt nac+ mg2c fe2c mn4c mn3c co2c ni2c lic+ pd2+ ti4c sr2c ca2c cly- hocl py vy nh4+ so4y lioh naoh koh foh cloh beoh al CE1 CF1 CF2 CF3 CG CD2 CH1E CH2E CH3E CM CP3 CPH1 CPH2 CQ66 CR55 CR56 CR66 CS66 CT CT3 CT4 CUA1 CUA2 CUA3 CUY1 CUY2 HA HC HMU HO HT LP NC NC2 NO2 NP NR1 NR2 NR3 NR55 NR56 NR66 NT NX OA OAC OC OE OH2 OK OM OS OSH OSI OT OW PO3 PO4 PT PUA1 PUY1 SE SH1E SK SO1 SO2 SO3 SO4 ST ST2 ST2 br br- br1 cl cl- cl1 cl12 cl13 cl14 cl1p ca+ cu+2 fe+2 mg+2 zn+2 cs+ li+ na+ rb+ al4z si si4 si4c si4z ar he kr ne xe dw hi hw ca cg ci co coh cp cr cs ct ct3 na nb nh nho ni no np nt nt2 nz oa oc oh op os ot sp bt cl\' si4l si5l si5t si6 si6o si\' br ca cc cd ce cf cl cp cq cu cv cx cy ha hc hn ho hp hs na nb nc nd nh oh os pb pc pd pe pf px py sh ss sx sy hn2 ho2 cz oo oz si sio hsi osi ",
|
| 90 |
+
"twoChar", " al al4z ar ba2+ beoh br br- br1 ca+ ca2+ ca2c cl cl' cl- cl1 cl12 cl13 cl14 cl1p cloh cly- co2c cs+ cu+2 cu2+ fe+2 fe2c ga ge he kr li+ lic+ lioh lp LP mg+2 mg2+ mg2c mn3c mn4c na+ nac+ naoh ne ni2c nu pd2+ rb+ si si' si4 si4c si4l si4z si5l si5t si6 si6o sio sr2c ti4c tioc titd xe zn+2 ",
|
| 91 |
+
"specialTypes", " IM IP sz az sy ay ayt ",
|
| 92 |
+
"secondCharOnly", " AH BH AC BC ");
|
| 93 |
+
});
|