qiushuocheng commited on Jul 12, 2025

Commit

a39be45

verified ·

1 Parent(s): 1fd8ddd

Initial upload

Browse files

Files changed (43) hide show

.gitignore +163 -6
.vscode/launch.json +29 -0
LICENSE +202 -0
NOTICE.txt +16 -0
README.md +90 -0
configs/train_split1_full_MSFF_DTCL.yaml +43 -0
configs/train_split2_full_MSFF_DTCL.yaml +43 -0
configs/train_split3_full_MSFF_DTCL.yaml +43 -0
eval.py +585 -0
evaluation/classificationMAP.py +26 -0
evaluation/detectionMAP.py +516 -0
evaluation/eval.py +129 -0
evaluation/utils.py +57 -0
feeders/__init__.py +1 -0
feeders/feeder.py +313 -0
feeders/tools.py +234 -0
graph/__init__.py +1 -0
graph/kinetics.py +76 -0
graph/ntu_rgb_d.py +69 -0
graph/tools.py +113 -0
huggingface.py +9 -0
human_model/Put SMPLH model here.txt +0 -0
model/__init__.py +7 -0
model/agcn.py +278 -0
model/losses.py +63 -0
prepare/configs/action_label_split1.json +6 -0
prepare/configs/action_label_split2.json +6 -0
prepare/configs/action_label_split3.json +6 -0
prepare/create_dataset.py +370 -0
prepare/dutils.py +310 -0
prepare/generate_dataset.sh +5 -0
prepare/preprocess.py +94 -0
prepare/rotation.py +91 -0
prepare/split_dataset.py +143 -0
prepare/viz.py +447 -0
pyproject.toml +13 -0
requirements.txt +28 -0
train.py +830 -0
train_full.py +788 -0
train_full_SSL.py +784 -0
train_full_SSL_Unet.py +813 -0
utils/__init__.py +0 -0
utils/logger.py +135 -0

.gitignore CHANGED Viewed

@@ -1,6 +1,163 @@
-*.zip
-LaSA/dataset/
-LaSA/result/
-USDRL/data/
-USDRL/pretrained/
-Simple-Skeleton-Detection/fig/

+# Dataset
+data
+dataset
+work_dir
+*.pkl
+*.mp4
+*.sh.o*
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

.vscode/launch.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "version": "0.2.0",
+  "configurations": [
+    {
+      "name": "Python: Debug Current File (gvhmr)",
+      "type": "debugpy",
+      "request": "launch",
+      "program": "${file}",
+      "console": "integratedTerminal",
+      "cwd": "/root/autodl-tmp/workshop2/",
+      "args": [
+        "--config",
+        // "config/eval.yaml"
+        "configss/train_split3_full_MSFF_DTCL.yaml",
+        // "config/pretrain2.yaml"
+        "--work-dir",
+        "./work_dir/3_agcn_cl_all/",
+        "-model_saved_name",
+        "./work_dir/3_agcn_cl_all/",
+        "--weights",
+        "/root/autodl-tmp/RVTCLR/work_dir/ntu60_cs/skeletonclr_joint/U/cl/pretext_babel/epoch300_model.pt"
+      ]
+    }
+  ]
+}
+// CUBLAS_WORKSPACE_CONFIG=:4096:8  python train_full_SSL.py --config configss/train_split3_full_MSFF_DTCL.yaml \
+//  --work-dir ./work_dir/3_agcn_cl_all/ \
+// -model_saved_name ./work_dir/3_agcn_cl_all/  \
+// --weights /root/autodl-tmp/RVTCLR/work_dir/ntu60_cs/skeletonclr_joint/U/both/pretext_babel_dense_U/epoch300_model.pt

LICENSE ADDED Viewed

	@@ -0,0 +1,202 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

NOTICE.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+### Open Source Software Notice ###
+This product from LINE Corporation contains the open source software or third-party software listed below, to which the terms in the LICENSE file in this repository do not apply.
+Please refer to the licences of the respective software repositories for the terms and conditions of their use.
+BABEL (Non-commercial License)
+https://github.com/abhinanda-punnakkal/BABEL
+2s-AGCN (CC BY-NC 4.0 License)
+https://github.com/lshiwjx/2s-AGCN
+FAC-Net (MIT License)
+https://github.com/LeonHLJ/FAC-Net
+pytorch-classification(MIT License)
+https://github.com/bearpaw/pytorch-classification

README.md ADDED Viewed

	@@ -0,0 +1,90 @@

+# Skeleton-Temporal-Action-Localization
+Code for the paper "Frame-Level Label Refinement for Skeleton-Based Weakly-Supervised Action Recognition" (AAAI 2023).
+## Overview
+Architecture of Network
+![Architecture of Network](./images/framework.jpg)
+## Requirements
+```bash
+conda create -n stal python=3.7
+conda activate stal
+conda install pytorch==1.10.1 torchvision==0.11.2 torchaudio==0.10.1 -c pytorch
+pip install -r requirements.txt
+```
+## Data Preparation
+Due to the distribution policy of AMASS dataset, we are not allowed to distribute the data directly. We provide a series of script that could reproduce our motion segmentation dataset from BABEL dataset.
+Download [AMASS Dataset](https://amass.is.tue.mpg.de/) and [BABEL Dataset](https://babel.is.tue.mpg.de/). Unzip and locate them in the `dataset` folder.
+Prepare the SMPLH Model following [this](https://github.com/vchoutas/smplx/blob/main/tools/README.md#smpl-h-version-used-in-amass) and put the merged model `SMPLH_male.pkl` into the `human_model` folder.
+The whole directory should be look like this:
+```
+Skeleton-Temporal-Action-Localization
+│   README.md
+│   train.py
+|   ...
+|
+└───config
+└───prepare
+└───...
+│
+└───human_model
+│   └───SMPLH_male.pkl
+│
+└───dataset
+    └───amass
+    |   └───ACCAD
+    |   └───BMLmovi
+    |   └───...
+    │
+    └───babel_v1.0_release
+        └───train.json
+        └───val.json
+        └───...
+```
+And also clone the BABEL offical code into the `dataset` folder.
+```bash
+git clone https://github.com/abhinanda-punnakkal/BABEL.git dataset/BABEL
+```
+Finally, the motion segmentation dataset can be generate by:
+```bash
+bash prepare/generate_dataset.sh
+```
+## Training and Evaluation
+Train and evaluate the model with subset-1 of BABEL, run following commands:
+```bash
+python train.py --config config/train_split1.yaml
+```
+## Acknowledgement
+Our codes are based on [BABEL](https://github.com/abhinanda-punnakkal/BABEL), [2s-AGCN](https://github.com/lshiwjx/2s-AGCN) and [FAC-Net](https://github.com/LeonHLJ/FAC-Net).
+## Citation
+```
+@InProceedings{yu2023frame,
+  title={Frame-Level Label Refinement for Skeleton-Based Weakly-Supervised Action Recognition},
+  author={Yu, Qing and Fujiwara, Kent},
+  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
+  volume={37},
+  number={3},
+  pages={3322--3330},
+  year={2023}
+}
+```
+## License
+[Apache License 2.0](LICENSE)
+Additionally, this repository contains third-party software. Refer [NOTICE.txt](NOTICE.txt) for more details and follow the terms and conditions of their use.

configs/train_split1_full_MSFF_DTCL.yaml ADDED Viewed

	@@ -0,0 +1,43 @@

+work_dir: ./work_dir/1_w_MSFF_full/
+model_saved_name: ./work_dir/1_w_MSFF_full/
+# feeder
+feeder: feeders.feeder.Feeder
+train_feeder_args:
+  data_path: ./dataset/train_split1.pkl
+  debug: False
+  random_choose: False
+  random_shift: False
+  random_move: False
+  window_size: -1
+  nb_class: 4
+test_feeder_args:
+  data_path: ./dataset/val_split1.pkl
+  nb_class: 4
+# model
+model: model.agcn_Unet.Model
+model_args:
+  num_class: 4
+  num_person: 1
+  num_point: 25 #  checked from 25
+  graph: graph.ntu_rgb_d.Graph
+  graph_args:
+    labeling_mode: 'spatial'
+#optim
+weight_decay: 0.001
+base_lr: 0.0002
+step: [60,80]
+# training
+device: [0]
+optimizer: 'Adam'
+loss: 'CE'
+batch_size: 8
+test_batch_size: 1
+num_epoch: 101   #101
+nesterov: True
+lambda_mil: 1.0
+weights: /home/newDisk/epoch300_model.pt

configs/train_split2_full_MSFF_DTCL.yaml ADDED Viewed

	@@ -0,0 +1,43 @@

+work_dir: ./work_dir/2_w_MSFF_full/
+model_saved_name: ./work_dir/2_w_MSFF_full/
+# feeder
+feeder: feeders.feeder.Feeder
+train_feeder_args:
+  data_path: ./dataset/train_split2.pkl
+  debug: False
+  random_choose: False
+  random_shift: False
+  random_move: False
+  window_size: -1
+  nb_class: 4
+test_feeder_args:
+  data_path: ./dataset/val_split2.pkl
+  nb_class: 4
+# model
+model: model.agcn_Unet.Model
+model_args:
+  num_class: 4
+  num_person: 1
+  num_point: 25 #  checked from 25
+  graph: graph.ntu_rgb_d.Graph
+  graph_args:
+    labeling_mode: 'spatial'
+#optim
+weight_decay: 0.0001
+base_lr: 0.001
+step: [60,80]
+# training
+device: [0]
+optimizer: 'Adam'
+loss: 'CE'
+batch_size: 8
+test_batch_size: 1
+num_epoch: 101   #101
+nesterov: True
+lambda_mil: 1.0
+weights: /home/newDisk/epoch300_model.pt

configs/train_split3_full_MSFF_DTCL.yaml ADDED Viewed

	@@ -0,0 +1,43 @@

+work_dir: ./work_dir/3_w_MSFF_full/
+model_saved_name: ./work_dir/3_w_MSFF_full/
+# feeder
+feeder: feeders.feeder.Feeder
+train_feeder_args:
+  data_path: ./dataset/train_split3.pkl
+  debug: False
+  random_choose: False
+  random_shift: False
+  random_move: False
+  window_size: -1
+  nb_class: 4
+test_feeder_args:
+  data_path: ./dataset/val_split3.pkl
+  nb_class: 4
+# model
+model: model.agcn_Unet.Model
+model_args:
+  num_class: 4
+  num_person: 1
+  num_point: 25 #  checked from 25
+  graph: graph.ntu_rgb_d.Graph
+  graph_args:
+    labeling_mode: 'spatial'
+#optim
+weight_decay: 0.0001
+base_lr: 0.001
+step: [60,80]
+# training
+device: [0]
+optimizer: 'Adam'
+loss: 'CE'
+batch_size: 8
+test_batch_size: 1
+num_epoch: 101   #101
+nesterov: True
+lambda_mil: 1.0
+weights: /home/newDisk/epoch300_model.pt

eval.py ADDED Viewed

	@@ -0,0 +1,585 @@

+from __future__ import print_function
+import argparse
+import inspect
+import os
+import pdb
+import pickle
+import random
+import re
+import shutil
+import time
+from collections import *
+import ipdb
+import numpy as np
+# torch
+import torch
+import torch.backends.cudnn as cudnn
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+import yaml
+from einops import rearrange, reduce, repeat
+from evaluation.classificationMAP import getClassificationMAP as cmAP
+from evaluation.detectionMAP import getSingleStreamDetectionMAP as dsmAP
+from feeders.tools import collate_with_padding_multi_joint
+from model.losses import cross_entropy_loss, mvl_loss
+from sklearn.metrics import f1_score
+# Custom
+from tensorboardX import SummaryWriter
+from torch.autograd import Variable
+from torch.optim.lr_scheduler import _LRScheduler
+from tqdm import tqdm
+from utils.logger import Logger
+def init_seed(seed):
+    torch.cuda.manual_seed_all(seed)
+    torch.manual_seed(seed)
+    np.random.seed(seed)
+    random.seed(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+def get_parser():
+    # parameter priority: command line > config > default
+    parser = argparse.ArgumentParser(
+        description="Spatial Temporal Graph Convolution Network"
+    )
+    parser.add_argument(
+        "--work-dir",
+        default="./work_dir/temp",
+        help="the work folder for storing results",
+    )
+    parser.add_argument("-model_saved_name", default="")
+    parser.add_argument(
+        "--config",
+        default="./config/nturgbd-cross-view/test_bone.yaml",
+        help="path to the configuration file",
+    )
+    # processor
+    parser.add_argument("--phase", default="test", help="must be train or test")
+    # visulize and debug
+    parser.add_argument("--seed", type=int, default=5, help="random seed for pytorch")
+    parser.add_argument(
+        "--log-interval",
+        type=int,
+        default=100,
+        help="the interval for printing messages (#iteration)",
+    )
+    parser.add_argument(
+        "--save-interval",
+        type=int,
+        default=2,
+        help="the interval for storing models (#iteration)",
+    )
+    parser.add_argument(
+        "--eval-interval",
+        type=int,
+        default=5,
+        help="the interval for evaluating models (#iteration)",
+    )
+    parser.add_argument(
+        "--print-log", type=str2bool, default=True, help="print logging or not"
+    )
+    parser.add_argument(
+        "--show-topk",
+        type=int,
+        default=[1, 5],
+        nargs="+",
+        help="which Top K accuracy will be shown",
+    )
+    # feeder
+    parser.add_argument(
+        "--feeder", default="feeder.feeder", help="data loader will be used"
+    )
+    parser.add_argument(
+        "--num-worker",
+        type=int,
+        default=32,
+        help="the number of worker for data loader",
+    )
+    parser.add_argument(
+        "--train-feeder-args",
+        default=dict(),
+        help="the arguments of data loader for training",
+    )
+    parser.add_argument(
+        "--test-feeder-args",
+        default=dict(),
+        help="the arguments of data loader for test",
+    )
+    # model
+    parser.add_argument("--model", default=None, help="the model will be used")
+    parser.add_argument(
+        "--model-args", type=dict, default=dict(), help="the arguments of model"
+    )
+    parser.add_argument(
+        "--weights", default=None, help="the weights for network initialization"
+    )
+    parser.add_argument(
+        "--ignore-weights",
+        type=str,
+        default=[],
+        nargs="+",
+        help="the name of weights which will be ignored in the initialization",
+    )
+    # optim
+    parser.add_argument(
+        "--base-lr", type=float, default=0.01, help="initial learning rate"
+    )
+    parser.add_argument(
+        "--step",
+        type=int,
+        default=[60,80],
+        nargs="+",
+        help="the epoch where optimizer reduce the learning rate",
+    )
+    # training
+    parser.add_argument(
+        "--device",
+        type=int,
+        default=0,
+        nargs="+",
+        help="the indexes of GPUs for training or testing",
+    )
+    parser.add_argument("--optimizer", default="SGD", help="type of optimizer")
+    parser.add_argument(
+        "--nesterov", type=str2bool, default=False, help="use nesterov or not"
+    )
+    parser.add_argument(
+        "--batch-size", type=int, default=256, help="training batch size"
+    )
+    parser.add_argument(
+        "--test-batch-size", type=int, default=256, help="test batch size"
+    )
+    parser.add_argument(
+        "--start-epoch", type=int, default=0, help="start training from which epoch"
+    )
+    parser.add_argument(
+        "--num-epoch", type=int, default=80, help="stop training in which epoch"
+    )
+    parser.add_argument(
+        "--weight-decay", type=float, default=0.0005, help="weight decay for optimizer"
+    )
+    # loss
+    parser.add_argument("--loss", type=str, default="CE", help="loss type(CE or focal)")
+    parser.add_argument(
+        "--label_count_path",
+        default=None,
+        type=str,
+        help="Path to label counts (used in loss weighting)",
+    )
+    parser.add_argument(
+        "---beta",
+        type=float,
+        default=0.9999,
+        help="Hyperparameter for Class balanced loss",
+    )
+    parser.add_argument(
+        "--gamma", type=float, default=2.0, help="Hyperparameter for Focal loss"
+    )
+    parser.add_argument("--only_train_part", default=False)
+    parser.add_argument("--only_train_epoch", default=0)
+    parser.add_argument("--warm_up_epoch", default=0)
+    parser.add_argument(
+        "--lambda-mil", default=1.0, help="balancing hyper-parameter of mil branch"
+    )
+    parser.add_argument(
+        "--class-threshold",
+        type=float,
+        default=0.1,
+        help="class threshold for rejection",
+    )
+    parser.add_argument(
+        "--start-threshold",
+        type=float,
+        default=0.03,
+        help="start threshold for action localization",
+    )
+    parser.add_argument(
+        "--end-threshold",
+        type=float,
+        default=0.055,
+        help="end threshold for action localization",
+    )
+    parser.add_argument(
+        "--threshold-interval",
+        type=float,
+        default=0.005,
+        help="threshold interval for action localization",
+    )
+    return parser
+class Processor:
+    """
+    Processor for Skeleton-based Action Recgnition
+    """
+    def __init__(self, arg):
+        self.arg = arg
+        self.save_arg()
+        if arg.phase == "train":
+            if not arg.train_feeder_args["debug"]:
+                if os.path.isdir(arg.model_saved_name):
+                    print("log_dir: ", arg.model_saved_name, "already exist")
+                    # answer = input('delete it? y/n:')
+                    answer = "y"
+                    if answer == "y":
+                        print("Deleting dir...")
+                        shutil.rmtree(arg.model_saved_name)
+                        print("Dir removed: ", arg.model_saved_name)
+                        # input('Refresh the website of tensorboard by pressing any keys')
+                    else:
+                        print("Dir not removed: ", arg.model_saved_name)
+                self.train_writer = SummaryWriter(
+                    os.path.join(arg.model_saved_name, "train"), "train"
+                )
+                self.val_writer = SummaryWriter(
+                    os.path.join(arg.model_saved_name, "val"), "val"
+                )
+            else:
+                self.train_writer = self.val_writer = SummaryWriter(
+                    os.path.join(arg.model_saved_name, "test"), "test"
+                )
+        self.global_step = 0
+        self.load_model()
+        self.load_optimizer()
+        self.load_data()
+        self.lr = self.arg.base_lr
+        self.best_acc = 0
+        self.best_per_class_acc = 0
+        self.loss_nce = torch.nn.BCELoss()
+        self.my_logger = Logger(
+            os.path.join(arg.model_saved_name, "log.txt"), title="SWTAL"
+        )
+        self.my_logger.set_names(["Step", "cmap"] + [f"map_0.{i}" for i in range(1, 6)])
+    def load_data(self):
+        Feeder = import_class(self.arg.feeder)
+        self.data_loader = dict()
+        if self.arg.phase == "train":
+            self.data_loader["train"] = torch.utils.data.DataLoader(
+                dataset=Feeder(**self.arg.train_feeder_args),
+                batch_size=self.arg.batch_size,
+                shuffle=True,
+                num_workers=self.arg.num_worker,
+                drop_last=True,
+                collate_fn=collate_with_padding_multi_joint,
+            )
+        self.data_loader["test"] = torch.utils.data.DataLoader(
+            dataset=Feeder(**self.arg.test_feeder_args),
+            batch_size=self.arg.test_batch_size,
+            shuffle=False,
+            num_workers=self.arg.num_worker,
+            drop_last=False,
+            collate_fn=collate_with_padding_multi_joint,
+        )
+    def load_model(self):
+        output_device = (
+            self.arg.device[0] if type(self.arg.device) is list else self.arg.device
+        )
+        self.output_device = output_device
+        Model = import_class(self.arg.model)
+        shutil.copy2(inspect.getfile(Model), self.arg.work_dir)
+        # print(Model)
+        self.model = Model(**self.arg.model_args).cuda(output_device)
+        # print(self.model)
+        self.loss_type = arg.loss
+        if self.arg.weights:
+            # self.global_step = int(arg.weights[:-3].split("-")[-1])
+            self.print_log("Load weights from {}.".format(self.arg.weights))
+            if ".pkl" in self.arg.weights:
+                with open(self.arg.weights, "r") as f:
+                    weights = pickle.load(f)
+            else:
+                weights = torch.load(self.arg.weights)
+            weights = OrderedDict(
+                [
+                    [k.split("module.")[-1], v.cuda(output_device)]
+                    for k, v in weights.items()
+                ]
+            )
+            keys = list(weights.keys())
+            for w in self.arg.ignore_weights:
+                for key in keys:
+                    if w in key:
+                        if weights.pop(key, None) is not None:
+                            self.print_log(
+                                "Sucessfully Remove Weights: {}.".format(key)
+                            )
+                        else:
+                            self.print_log("Can Not Remove Weights: {}.".format(key))
+            try:
+                self.model.load_state_dict(weights)
+            except:
+                state = self.model.state_dict()
+                diff = list(set(state.keys()).difference(set(weights.keys())))
+                print("Can not find these weights:")
+                for d in diff:
+                    print("  " + d)
+                state.update(weights)
+                self.model.load_state_dict(state)
+        if type(self.arg.device) is list:
+            if len(self.arg.device) > 1:
+                self.model = nn.DataParallel(
+                    self.model, device_ids=self.arg.device, output_device=output_device
+                )
+    def load_optimizer(self):
+        if self.arg.optimizer == "SGD":
+            self.optimizer = optim.SGD(
+                self.model.parameters(),
+                lr=self.arg.base_lr,
+                momentum=0.9,
+                nesterov=self.arg.nesterov,
+                weight_decay=self.arg.weight_decay,
+            )
+        elif self.arg.optimizer == "Adam":
+            self.optimizer = optim.Adam(
+                self.model.parameters(),
+                lr=self.arg.base_lr,
+                weight_decay=self.arg.weight_decay,
+            )
+        else:
+            raise ValueError()
+    def save_arg(self):
+        # save arg
+        arg_dict = vars(self.arg)
+        if not os.path.exists(self.arg.work_dir):
+            os.makedirs(self.arg.work_dir)
+        with open("{}/config.yaml".format(self.arg.work_dir), "w") as f:
+            yaml.dump(arg_dict, f)
+    def adjust_learning_rate(self, epoch):
+        if self.arg.optimizer == "SGD" or self.arg.optimizer == "Adam":
+            if epoch < self.arg.warm_up_epoch:
+                lr = self.arg.base_lr * (epoch + 1) / self.arg.warm_up_epoch
+            else:
+                lr = self.arg.base_lr * (
+                    0.1 ** np.sum(epoch >= np.array(self.arg.step))
+                )
+            for param_group in self.optimizer.param_groups:
+                param_group["lr"] = lr
+            return lr
+        else:
+            raise ValueError()
+    def print_time(self):
+        localtime = time.asctime(time.localtime(time.time()))
+        self.print_log("Local current time :  " + localtime)
+    def print_log(self, str, print_time=True):
+        if print_time:
+            localtime = time.asctime(time.localtime(time.time()))
+            str = "[ " + localtime + " ] " + str
+        print(str)
+        if self.arg.print_log:
+            with open("{}/print_log.txt".format(self.arg.work_dir), "a") as f:
+                print(str, file=f)
+    def record_time(self):
+        self.cur_time = time.time()
+        return self.cur_time
+    def split_time(self):
+        split_time = time.time() - self.cur_time
+        self.record_time()
+        return split_time
+    @torch.no_grad()
+    def eval(
+        self,
+        epoch,
+        wb_dict,
+        loader_name=["test"],
+    ):
+        self.model.eval()
+        self.print_log("Eval epoch: {}".format(epoch + 1))
+        vid_preds = []
+        frm_preds = []
+        vid_lens = []
+        labels = []
+        for ln in loader_name:
+            loss_value = []
+            step = 0
+            process = tqdm(self.data_loader[ln])
+            for batch_idx, (data, label, target, mask, index, soft_label) in enumerate(
+                process
+            ):
+                data = data.float().cuda(self.output_device)
+                label = label.cuda(self.output_device)
+                mask = mask.cuda(self.output_device)
+                ab_labels = torch.cat([label, torch.ones(label.size(0), 1).cuda()], -1)
+                # forward
+                mil_pred, frm_scrs, mil_pred_2, frm_scrs_2 = self.model(data,mask)
+                cls_mil_loss = self.loss_nce(
+                    mil_pred, ab_labels.float()
+                ) + self.loss_nce(mil_pred_2, ab_labels.float())
+                loss_co = mvl_loss(frm_scrs, frm_scrs_2, rate=0.2, weight=0.5)
+                loss = cls_mil_loss * self.arg.lambda_mil + loss_co
+                loss_value.append(loss.data.item())
+                for i in range(data.size(0)):
+                    frm_scr = frm_scrs[i]
+                    vid_pred = mil_pred[i]
+                    label_ = label[i].cpu().numpy()
+                    mask_ = mask[i].cpu().numpy()
+                    vid_len = mask_.sum()
+                    frm_pred = F.softmax(frm_scr, -1).cpu().numpy()[:vid_len]
+                    vid_pred = vid_pred.cpu().numpy()
+                    vid_preds.append(vid_pred)
+                    frm_preds.append(frm_pred)
+                    vid_lens.append(vid_len)
+                    labels.append(label_)
+                step += 1
+            vid_preds = np.array(vid_preds)
+            frm_preds = np.array(frm_preds)
+            vid_lens = np.array(vid_lens)
+            labels = np.array(labels)
+            cmap = cmAP(vid_preds, labels)
+            score = cmap
+            loss = np.mean(loss_value)
+            dmap, iou = dsmAP(
+                vid_preds,
+                frm_preds,
+                vid_lens,
+                self.arg.test_feeder_args["data_path"],
+                self.arg,
+                multi=True,
+            )
+            print("Classification map %f" % cmap)
+            for item in list(zip(iou, dmap)):
+                print("Detection map @ %f = %f" % (item[0], item[1]))
+            self.my_logger.append([epoch + 1, cmap] + dmap)
+            wb_dict["val loss"] = loss
+            wb_dict["val acc"] = score
+            if score > self.best_acc:
+                self.best_acc = score
+            print("Acc score: ", score, " model: ", self.arg.model_saved_name)
+            if self.arg.phase == "train":
+                self.val_writer.add_scalar("loss", loss, self.global_step)
+                self.val_writer.add_scalar("acc", score, self.global_step)
+            self.print_log(
+                "\tMean {} loss of {} batches: {}.".format(
+                    ln, len(self.data_loader[ln]), np.mean(loss_value)
+                )
+            )
+            self.print_log("\tAcc score: {:.3f}%".format(score))
+        return wb_dict
+    def start(self):
+        wb_dict = {}
+        if self.arg.phase == "test":
+            if not self.arg.test_feeder_args["debug"]:
+                wf = self.arg.model_saved_name + "_wrong.txt"
+                rf = self.arg.model_saved_name + "_right.txt"
+            else:
+                wf = rf = None
+            if self.arg.weights is None:
+                raise ValueError("Please appoint --weights.")
+            self.arg.print_log = False
+            self.print_log("Model:   {}.".format(self.arg.model))
+            self.print_log("Weights: {}.".format(self.arg.weights))
+            wb_dict = self.eval(
+                epoch=0,
+                wb_dict=wb_dict,
+                loader_name=["test"],
+                # wrong_file=wf,
+                # result_file=rf,
+            )
+            print("Inference metrics: ", wb_dict)
+            self.print_log("Done.\n")
+def str2bool(v):
+    if v.lower() in ("yes", "true", "t", "y", "1"):
+        return True
+    elif v.lower() in ("no", "false", "f", "n", "0"):
+        return False
+    else:
+        raise argparse.ArgumentTypeError("Boolean value expected.")
+def import_class(name):
+    components = name.split(".")
+    mod = __import__(components[0])
+    for comp in components[1:]:
+        mod = getattr(mod, comp)
+    return mod
+if __name__ == "__main__":
+    parser = get_parser()
+    # load arg form config file
+    p = parser.parse_args()
+    if p.config is not None:
+        with open(p.config, "r") as f:
+            default_arg = yaml.safe_load(f)
+        key = vars(p).keys()
+        for k in default_arg.keys():
+            if k not in key:
+                print("WRONG ARG: {}".format(k))
+                assert k in key
+        parser.set_defaults(**default_arg)
+    arg = parser.parse_args()
+    print("BABEL Action Recognition")
+    print("Config: ", arg)
+    init_seed(arg.seed)
+    processor = Processor(arg)
+    processor.start()

evaluation/classificationMAP.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import numpy as np
+def getAP(conf, labels):
+    assert len(conf) == len(labels)
+    sortind = np.argsort(-conf)
+    tp = labels[sortind] == 1
+    fp = labels[sortind] != 1
+    npos = np.sum(labels)
+    fp = np.cumsum(fp).astype("float32")
+    tp = np.cumsum(tp).astype("float32")
+    rec = tp / npos
+    prec = tp / (fp + tp)
+    tmp = (labels[sortind] == 1).astype("float32")
+    return np.sum(tmp * prec) / npos if npos > 0 else 1
+def getClassificationMAP(confidence, labels):
+    """ confidence and labels are of dimension n_samples x n_label """
+    AP = []
+    for i in range(np.shape(labels)[1]):
+        AP.append(getAP(confidence[:, i], labels[:, i]))
+    return 100 * sum(AP) / len(AP)

evaluation/detectionMAP.py ADDED Viewed

	@@ -0,0 +1,516 @@

+import pickle
+from collections import Counter
+import numpy as np
+def str2ind(categoryname, classlist):
+    return [i for i in range(len(classlist)) if categoryname == classlist[i]][0]
+def encode_mask_to_rle(mask):
+    """
+    mask: numpy array binary mask
+    1 - mask
+    0 - background
+    Returns encoded run length
+    """
+    pixels = mask.flatten()
+    pixels = np.concatenate([[0], pixels, [0]])
+    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
+    runs[1::2] -= runs[::2]
+    return runs
+def filter_segments(segment_predict, videonames, ambilist, factor):
+    ind = np.zeros(np.shape(segment_predict)[0])
+    for i in range(np.shape(segment_predict)[0]):
+        vn = videonames[int(segment_predict[i, 0])]
+        for a in ambilist:
+            if a[0] == vn:
+                gt = range(
+                    int(round(float(a[2]) * factor)), int(round(float(a[3]) * factor))
+                )
+                pd = range(int(segment_predict[i][1]), int(segment_predict[i][2]))
+                IoU = float(len(set(gt).intersection(set(pd)))) / float(
+                    len(set(gt).union(set(pd)))
+                )
+                if IoU > 0:
+                    ind[i] = 1
+    s = [
+        segment_predict[i, :]
+        for i in range(np.shape(segment_predict)[0])
+        if ind[i] == 0
+    ]
+    return np.array(s)
+def getActLoc(
+    vid_preds, frm_preds, vid_lens, act_thresh_cas, annotation_path, args, multi=False
+):
+    try:
+        with open(annotation_path) as f:
+            data = pickle.load(f)
+    except:
+        # for pickle file from python2
+        with open(annotation_path, "rb") as f:
+            data = pickle.load(f, encoding="latin1")
+    if multi:
+        gtsegments = []
+        gtlabels = []
+        for idx in range(len(data["L"])):
+            gt = data["L"][idx]
+            gt_ = set(gt)
+            gt_.discard(args.model_args["num_class"])
+            gts = []
+            gtl = []
+            for c in list(gt_):
+                gt_encoded = encode_mask_to_rle(gt == c)
+                gts.extend(
+                    [
+                        [x - 1, x + y - 2]
+                        for x, y in zip(gt_encoded[::2], gt_encoded[1::2])
+                    ]
+                )
+                gtl.extend([c for item in gt_encoded[::2]])
+            gtsegments.append(gts)
+            gtlabels.append(gtl)
+    else:
+        gtsegments = []
+        gtlabels = []
+        for idx in range(len(data["L"])):
+            gt = data["L"][idx]
+            gt_encoded = encode_mask_to_rle(gt)
+            gtsegments.append(
+                [[x - 1, x + y - 2] for x, y in zip(gt_encoded[::2], gt_encoded[1::2])]
+            )
+            gtlabels.append([data["Y"][idx] for item in gt_encoded[::2]])
+    videoname = np.array(data["sid"])
+    # keep ground truth and predictions for instances with temporal annotations
+    gtl, vn, vp, fp, vl = [], [], [], [], []
+    for i, s in enumerate(gtsegments):
+        if len(s):
+            gtl.append(gtlabels[i])
+            vn.append(videoname[i])
+            vp.append(vid_preds[i])
+            fp.append(frm_preds[i])
+            vl.append(vid_lens[i])
+        else:
+            print(i)
+    gtlabels = gtl
+    videoname = vn
+    # which categories have temporal labels ?
+    templabelidx = sorted(list(set([l for gtl in gtlabels for l in gtl])))
+    dataset_segment_predict = []
+    class_threshold = args.class_threshold
+    for c in range(frm_preds[0].shape[1]):
+        c_temp = []
+        # Get list of all predictions for class c
+        for i in range(len(fp)):
+            vid_cls_score = vp[i][c]
+            vid_cas = fp[i][:, c]
+            vid_cls_proposal = []
+            # if vid_cls_score < class_threshold:
+            #     continue
+            for t in range(len(act_thresh_cas)):
+                thres = act_thresh_cas[t]
+                vid_pred = np.concatenate(
+                    [np.zeros(1), (vid_cas > thres).astype("float32"), np.zeros(1)],
+                    axis=0,
+                )
+                vid_pred_diff = [
+                    vid_pred[idt] - vid_pred[idt - 1] for idt in range(1, len(vid_pred))
+                ]
+                s = [idk for idk, item in enumerate(vid_pred_diff) if item == 1]
+                e = [idk for idk, item in enumerate(vid_pred_diff) if item == -1]
+                for j in range(len(s)):
+                    len_proposal = e[j] - s[j]
+                    if len_proposal >= 3:
+                        inner_score = np.mean(vid_cas[s[j] : e[j] + 1])
+                        outer_s = max(0, int(s[j] - 0.25 * len_proposal))
+                        outer_e = min(
+                            int(vid_cas.shape[0] - 1),
+                            int(e[j] + 0.25 * len_proposal + 1),
+                        )
+                        outer_temp_list = list(range(outer_s, int(s[j]))) + list(
+                            range(int(e[j] + 1), outer_e)
+                        )
+                        if len(outer_temp_list) == 0:
+                            outer_score = 0
+                        else:
+                            outer_score = np.mean(vid_cas[outer_temp_list])
+                        c_score = inner_score - 0.6 * outer_score
+                        vid_cls_proposal.append([i, s[j], e[j] + 1, c_score])
+            pick_idx = NonMaximumSuppression(np.array(vid_cls_proposal), 0.2)
+            nms_vid_cls_proposal = [vid_cls_proposal[k] for k in pick_idx]
+            c_temp += nms_vid_cls_proposal
+        if len(c_temp) > 0:
+            c_temp = np.array(c_temp)
+        dataset_segment_predict.append(c_temp)
+    """
+    for i, pred in enumerate(dataset_segment_predict):
+        print (f"#{i} class {c} has {len(pred)} predictions")
+    """
+    return dataset_segment_predict
+def IntergrateSegs(rgb_segs, flow_segs, th, args):
+    NUM_CLASS = args.class_num
+    NUM_VID = 212
+    segs = []
+    for i in range(NUM_CLASS):
+        class_seg = []
+        rgb_seg = rgb_segs[i]
+        flow_seg = flow_segs[i]
+        rgb_seg_ind = np.array(rgb_seg)[:, 0]
+        flow_seg_ind = np.array(flow_seg)[:, 0]
+        for j in range(NUM_VID):
+            rgb_find = np.where(rgb_seg_ind == j)
+            flow_find = np.where(flow_seg_ind == j)
+            if len(rgb_find[0]) == 0 and len(flow_find[0]) == 0:
+                continue
+            elif len(rgb_find[0]) != 0 and len(flow_find[0]) != 0:
+                rgb_vid_seg = rgb_seg[rgb_find[0]]
+                flow_vid_seg = flow_seg[flow_find[0]]
+                fuse_seg = np.concatenate([rgb_vid_seg, flow_vid_seg], axis=0)
+                pick_idx = NonMaximumSuppression(fuse_seg, th)
+                fuse_segs = fuse_seg[pick_idx]
+                class_seg.append(fuse_segs)
+            elif len(rgb_find[0]) != 0 and len(flow_find[0]) == 0:
+                vid_seg = rgb_seg[rgb_find[0]]
+                class_seg.append(vid_seg)
+            elif len(rgb_find[0]) == 0 and len(flow_find[0]) != 0:
+                vid_seg = flow_seg[flow_find[0]]
+                class_seg.append(vid_seg)
+        class_seg = np.concatenate(class_seg, axis=0)
+        segs.append(class_seg)
+    return segs
+def NonMaximumSuppression(segs, overlapThresh):
+    # if there are no boxes, return an empty list
+    if len(segs) == 0:
+        return []
+    # if the bounding boxes integers, convert them to floats --
+    # this is important since we'll be doing a bunch of divisions
+    if segs.dtype.kind == "i":
+        segs = segs.astype("float")
+    # initialize the list of picked indexes
+    pick = []
+    # grab the coordinates of the segments
+    s = segs[:, 1]
+    e = segs[:, 2]
+    scores = segs[:, 3]
+    # compute the area of the bounding boxes and sort the bounding
+    # boxes by the score of the bounding box
+    area = e - s + 1
+    idxs = np.argsort(scores)
+    # keep looping while some indexes still remain in the indexes
+    # list
+    while len(idxs) > 0:
+        # grab the last index in the indexes list and add the
+        # index value to the list of picked indexes
+        last = len(idxs) - 1
+        i = idxs[last]
+        pick.append(i)
+        # find the largest coordinates for the start of
+        # the segments and the smallest coordinates
+        # for the end of the segments
+        maxs = np.maximum(s[i], s[idxs[:last]])
+        mine = np.minimum(e[i], e[idxs[:last]])
+        # compute the length of the overlapping area
+        l = np.maximum(0, mine - maxs + 1)
+        # compute the ratio of overlap
+        overlap = l / area[idxs[:last]]
+        # delete segments beyond the threshold
+        idxs = np.delete(
+            idxs, np.concatenate(([last], np.where(overlap > overlapThresh)[0]))
+        )
+    return pick
+def getLocMAP(seg_preds, th, annotation_path, args, multi=False, factor=1.0):
+    try:
+        with open(annotation_path) as f:
+            data = pickle.load(f)
+    except:
+        # for pickle file from python2
+        with open(annotation_path, "rb") as f:
+            data = pickle.load(f, encoding="latin1")
+    if multi:
+        gtsegments = []
+        gtlabels = []
+        for idx in range(len(data["L"])):
+            gt = data["L"][idx]
+            gt_ = set(gt)
+            # gt_.discard(args.model_args["num_classes"])
+            gt_.discard(4)
+            gts = []
+            gtl = []
+            for c in list(gt_):
+                gt_encoded = encode_mask_to_rle(gt == c)
+                gts.extend(
+                    [
+                        [x - 1, x + y - 2]
+                        for x, y in zip(gt_encoded[::2], gt_encoded[1::2])
+                    ]
+                )
+                gtl.extend([c for item in gt_encoded[::2]])
+            gtsegments.append(gts)
+            gtlabels.append(gtl)
+    # else:
+    #     gtsegments = []
+    #     gtlabels = []
+    #     for idx in range(len(data["L"])):
+    #         gt = data["L"][idx]
+    #         gt_encoded = encode_mask_to_rle(gt)
+    #         gtsegments.append(
+    #             [[x - 1, x + y - 2] for x, y in zip(gt_encoded[::2], gt_encoded[1::2])]
+    #         )
+    #         gtlabels.append([data["Y"][idx] for item in gt_encoded[::2]])
+    # videoname = np.array(data["sid"])
+    # """
+    # cnt = Counter(data['Y'])
+    # d = cnt.most_common()
+    # print (d)
+    # """
+    # # which categories have temporal labels ?
+    # templabelidx = sorted(list(set([l for gtl in gtlabels for l in gtl])))
+    templabelidx = [0,1,2,3]
+    ap = []
+    for c in templabelidx:
+        segment_predict = seg_preds[c]
+        # Sort the list of predictions for class c based on score
+        if len(segment_predict) == 0:
+            ap.append(0.0)
+            continue
+        segment_predict = segment_predict[np.argsort(-segment_predict[:, 3])]
+        # Create gt list
+        segment_gt = [
+            [i, gtsegments[i][j][0], gtsegments[i][j][1]]
+            for i in range(len(gtsegments))
+            for j in range(len(gtsegments[i]))
+            if gtlabels[i][j] == c
+        ]
+        gtpos = len(segment_gt)
+        # Compare predictions and gt
+        tp, fp = [], []
+        for i in range(len(segment_predict)):
+            matched = False
+            best_iou = 0
+            for j in range(len(segment_gt)):
+                if segment_predict[i][0] == segment_gt[j][0]:
+                    gt = range(
+                        int(round(segment_gt[j][1] * factor)),
+                        int(round(segment_gt[j][2] * factor)),
+                    )
+                    p = range(int(segment_predict[i][1]), int(segment_predict[i][2]))
+                    # IoU = float(len(set(gt).intersection(set(p)))) / float(
+                    #     len(set(gt).union(set(p)))
+                    # )
+                    union_set = set(gt).union(set(p))
+                    if len(union_set) == 0:
+                        IoU = 0.0  # or handle the case as needed
+                    else:
+                        IoU = float(len(set(gt).intersection(set(p)))) / float(len(union_set))
+                    if IoU >= th:
+                        matched = True
+                        if IoU > best_iou:
+                            best_iou = IoU
+                            best_j = j
+            if matched:
+                del segment_gt[best_j]
+            tp.append(float(matched))
+            fp.append(1.0 - float(matched))
+        tp_c = np.cumsum(tp)
+        fp_c = np.cumsum(fp)
+        # print (c, tp, fp)
+        if sum(tp) == 0:
+            prc = 0.0
+        else:
+            cur_prec = tp_c / (fp_c + tp_c)
+            cur_rec = tp_c / gtpos
+            prc = _ap_from_pr(cur_prec, cur_rec)
+        ap.append(prc)
+    print(f" ".join([f"{item*100:.2f}" for item in ap]))
+    if ap:
+        return 100 * np.mean(ap)
+    else:
+        return 0
+# Inspired by Pascal VOC evaluation tool.
+def _ap_from_pr(prec, rec):
+    mprec = np.hstack([[0], prec, [0]])
+    mrec = np.hstack([[0], rec, [1]])
+    for i in range(len(mprec) - 1)[::-1]:
+        mprec[i] = max(mprec[i], mprec[i + 1])
+    idx = np.where(mrec[1::] != mrec[0:-1])[0] + 1
+    ap = np.sum((mrec[idx] - mrec[idx - 1]) * mprec[idx])
+    return ap
+def compute_iou(dur1, dur2):
+    # find the each edge of intersect rectangle
+    left_line = max(dur1[0], dur2[0])
+    right_line = min(dur1[1], dur2[1])
+    # judge if there is an intersect
+    if left_line >= right_line:
+        return 0
+    else:
+        intersect = right_line - left_line
+        union = max(dur1[1], dur2[1]) - min(dur1[0], dur2[0])
+        return intersect / union
+def getActLoc1(
+     frm_preds,act_thresh_cas = np.arange(0.03, 0.055, 0.005)
+):
+    fp = []
+    for i, s in enumerate(frm_preds):
+            fp.append(frm_preds[i])
+    dataset_segment_predict = []
+    for c in range(frm_preds[0].shape[1]):
+        c_temp = []
+        # Get list of all predictions for class c
+        for i in range(len(fp)):
+            vid_cas = fp[i][:, c]
+            vid_cls_proposal = []
+            for t in range(len(act_thresh_cas)):
+                thres = act_thresh_cas[t]
+                vid_pred = np.concatenate(
+                    [np.zeros(1), (vid_cas > thres).astype("float32"), np.zeros(1)],
+                    axis=0,
+                )
+                vid_pred_diff = [
+                    vid_pred[idt] - vid_pred[idt - 1] for idt in range(1, len(vid_pred))
+                ]
+                s = [idk for idk, item in enumerate(vid_pred_diff) if item == 1]
+                e = [idk for idk, item in enumerate(vid_pred_diff) if item == -1]
+                for j in range(len(s)):
+                    len_proposal = e[j] - s[j]
+                    if len_proposal >= 3:
+                        inner_score = np.mean(vid_cas[s[j] : e[j] + 1])
+                        outer_s = max(0, int(s[j] - 0.25 * len_proposal))
+                        outer_e = min(
+                            int(vid_cas.shape[0] - 1),
+                            int(e[j] + 0.25 * len_proposal + 1),
+                        )
+                        outer_temp_list = list(range(outer_s, int(s[j]))) + list(
+                            range(int(e[j] + 1), outer_e)
+                        )
+                        if len(outer_temp_list) == 0:
+                            outer_score = 0
+                        else:
+                            outer_score = np.mean(vid_cas[outer_temp_list])
+                        c_score = inner_score - 0.6 * outer_score
+                        vid_cls_proposal.append([i, s[j], e[j] + 1, c_score])
+            pick_idx = NonMaximumSuppression(np.array(vid_cls_proposal), 0.2)
+            nms_vid_cls_proposal = [vid_cls_proposal[k] for k in pick_idx]
+            c_temp += nms_vid_cls_proposal
+        if len(c_temp) > 0:
+            c_temp = np.array(c_temp)
+        dataset_segment_predict.append(c_temp)
+    """
+    for i, pred in enumerate(dataset_segment_predict):
+        print (f"#{i} class {c} has {len(pred)} predictions")
+    """
+    return dataset_segment_predict
+def getSingleStreamDetectionMAP(
+    vid_preds, frm_preds, vid_lens, annotation_path, args, multi=False, factor=1.0
+):
+    iou_list = [0.1, 0.2, 0.3, 0.4, 0.5]
+    dmap_list = []
+    seg = getActLoc1(
+        frm_preds,
+        np.arange(args.start_threshold, args.end_threshold, args.threshold_interval),
+    )
+    for iou in iou_list:
+        print("Testing for IoU %f" % iou)
+        dmap_list.append(
+            getLocMAP(seg, iou, annotation_path, args, multi=multi, factor=factor)
+        )
+    return dmap_list, iou_list
+def getTwoStreamDetectionMAP(
+    rgb_vid_preds,
+    flow_vid_preds,
+    rgb_frm_preds,
+    flow_frm_preds,
+    vid_lens,
+    annotation_path,
+    args,
+):
+    iou_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7]
+    dmap_list = []
+    rgb_seg = getActLoc(
+        rgb_vid_preds,
+        rgb_frm_preds * 0.1,
+        vid_lens,
+        np.arange(args.start_threshold, args.end_threshold, args.threshold_interval)
+        * 0.1,
+        annotation_path,
+        args,
+    )
+    flow_seg = getActLoc(
+        flow_vid_preds,
+        flow_frm_preds,
+        vid_lens,
+        np.arange(args.start_threshold, args.end_threshold, args.threshold_interval),
+        annotation_path,
+        args,
+    )
+    seg = IntergrateSegs(rgb_seg, flow_seg, 0.9, args)
+    for iou in iou_list:
+        print("Testing for IoU %f" % iou)
+        dmap_list.append(getLocMAP(seg, iou, annotation_path, args))
+    return dmap_list, iou_list
+def getSingleStreamDetectionMAP_gcn(
+    seg, annotation_path, args, multi=False, factor=1.0
+):
+    '''
+        seg is a list of 4+1 ndarrays
+        each ndarray is of shape (# pred, 4), 4 expands as [videoindex, s[j], e[j] + 1, c_score]
+    '''
+    iou_list = [0.3, 0.5]
+    iou_list = [0.1,0.2,0.3, 0.4,0.5]
+    dmap_list = []
+    for iou in iou_list:
+        print("Testing for IoU %f" % iou)
+        dmap_list.append(
+            getLocMAP(seg, iou, annotation_path, args, multi=multi, factor=factor)
+        )
+    return dmap_list, iou_list

evaluation/eval.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import numpy as np
+import torch
+import torch.nn.functional as F
+from torch.autograd import Variable
+from .classificationMAP import getClassificationMAP as cmAP
+from .detectionMAP import getSingleStreamDetectionMAP as dsmAP
+from .detectionMAP import getTwoStreamDetectionMAP as dtmAP
+from .utils import write_results_to_eval_file, write_results_to_file
+def ss_eval(epoch, dataloader, args, logger, model, device):
+    vid_preds = []
+    frm_preds = []
+    vid_lens = []
+    labels = []
+    for num, sample in enumerate(dataloader):
+        if (num + 1) % 100 == 0:
+            print("Testing test data point %d of %d" % (num + 1, len(dataloader)))
+        features = sample["data"].numpy()
+        label = sample["labels"].numpy()
+        vid_len = sample["vid_len"].numpy()
+        features = torch.from_numpy(features).float().to(device)
+        with torch.no_grad():
+            _, vid_pred, _, frm_scr = model(Variable(features))
+            frm_pred = F.softmax(frm_scr, -1)
+            vid_pred = np.squeeze(vid_pred.cpu().data.numpy(), axis=0)
+            frm_pred = np.squeeze(frm_pred.cpu().data.numpy(), axis=0)
+            label = np.squeeze(label, axis=0)
+        vid_preds.append(vid_pred)
+        frm_preds.append(frm_pred)
+        vid_lens.append(vid_len)
+        labels.append(label)
+    vid_preds = np.array(vid_preds)
+    frm_preds = np.array(frm_preds)
+    vid_lens = np.array(vid_lens)
+    labels = np.array(labels)
+    cmap = cmAP(vid_preds, labels)
+    dmap, iou = dsmAP(
+        vid_preds, frm_preds, vid_lens, dataloader.dataset.path_to_annotations, args
+    )
+    print("Classification map %f" % cmap)
+    for item in list(zip(iou, dmap)):
+        print("Detection map @ %f = %f" % (item[0], item[1]))
+    logger.log_value("Test Classification mAP", cmap, epoch)
+    for item in list(zip(dmap, iou)):
+        logger.log_value("Test Detection1 mAP @ IoU = " + str(item[1]), item[0], epoch)
+    write_results_to_file(args, dmap, cmap, epoch)
+def ts_eval(dataloader, args, logger, rgb_model, flow_model, device):
+    rgb_vid_preds = []
+    rgb_frame_preds = []
+    flow_vid_preds = []
+    flow_frame_preds = []
+    vid_lens = []
+    labels = []
+    for num, sample in enumerate(dataloader):
+        if (num + 1) % 100 == 0:
+            print("Testing test data point %d of %d" % (num + 1, len(dataloader)))
+        rgb_features = sample["rgb_data"].numpy()
+        flow_features = sample["flow_data"].numpy()
+        label = sample["labels"].numpy()
+        vid_len = sample["vid_len"].numpy()
+        rgb_features_inp = torch.from_numpy(rgb_features).float().to(device)
+        flow_features_inp = torch.from_numpy(flow_features).float().to(device)
+        with torch.no_grad():
+            _, rgb_video_pred, _, rgb_frame_scr = rgb_model(Variable(rgb_features_inp))
+            _, flow_video_pred, _, flow_frame_scr = flow_model(
+                Variable(flow_features_inp)
+            )
+            rgb_frame_pred = F.softmax(rgb_frame_scr, -1)
+            flow_frame_pred = F.softmax(flow_frame_scr, -1)
+            rgb_frame_pred = np.squeeze(rgb_frame_pred.cpu().data.numpy(), axis=0)
+            flow_frame_pred = np.squeeze(flow_frame_pred.cpu().data.numpy(), axis=0)
+            rgb_video_pred = np.squeeze(rgb_video_pred.cpu().data.numpy(), axis=0)
+            flow_video_pred = np.squeeze(flow_video_pred.cpu().data.numpy(), axis=0)
+            label = np.squeeze(label, axis=0)
+        rgb_vid_preds.append(rgb_video_pred)
+        rgb_frame_preds.append(rgb_frame_pred)
+        flow_vid_preds.append(flow_video_pred)
+        flow_frame_preds.append(flow_frame_pred)
+        vid_lens.append(vid_len)
+        labels.append(label)
+    rgb_vid_preds = np.array(rgb_vid_preds)
+    rgb_frame_preds = np.array(rgb_frame_preds)
+    flow_vid_preds = np.array(flow_vid_preds)
+    flow_frame_preds = np.array(flow_frame_preds)
+    vid_lens = np.array(vid_lens)
+    labels = np.array(labels)
+    dmap, iou = dtmAP(
+        rgb_vid_preds,
+        flow_vid_preds,
+        rgb_frame_preds,
+        flow_frame_preds,
+        vid_lens,
+        dataloader.dataset.path_to_annotations,
+        args,
+    )
+    sum = 0
+    count = 0
+    for item in list(zip(iou, dmap)):
+        print("Detection map @ %f = %f" % (item[0], item[1]))
+        if count < 7:
+            sum = sum + item[1]
+            count += 1
+    print("average map = %f" % (sum / count))
+    write_results_to_eval_file(args, dmap, args.rgb_load_epoch, args.flow_load_epoch)

evaluation/utils.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import numpy as np
+import torch.nn as nn
+import torch.nn.functional as F
+def str2ind(categoryname, classlist):
+    return [
+        i for i in range(len(classlist)) if categoryname == classlist[i].decode("utf-8")
+    ][0]
+def strlist2indlist(strlist, classlist):
+    return [str2ind(s, classlist) for s in strlist]
+def strlist2multihot(strlist, classlist):
+    return np.sum(np.eye(len(classlist))[strlist2indlist(strlist, classlist)], axis=0)
+def idx2multihot(id_list, num_class):
+    return np.sum(np.eye(num_class)[id_list], axis=0)
+def write_results_to_eval_file(args, dmap, itr1, itr2):
+    file_folder = "./ckpt/" + args.dataset_name + "/eval/"
+    file_name = args.dataset_name + "-results.log"
+    fid = open(file_folder + file_name, "a+")
+    string_to_write = str(itr1)
+    string_to_write += " " + str(itr2)
+    for item in dmap:
+        string_to_write += " " + "%.2f" % item
+    fid.write(string_to_write + "\n")
+    fid.close()
+def write_results_to_file(args, dmap, cmap, itr):
+    file_folder = "./ckpt/" + args.dataset_name + "/" + str(args.model_id) + "/"
+    file_name = args.dataset_name + "-results.log"
+    fid = open(file_folder + file_name, "a+")
+    string_to_write = str(itr)
+    for item in dmap:
+        string_to_write += " " + "%.2f" % item
+    string_to_write += " " + "%.2f" % cmap
+    fid.write(string_to_write + "\n")
+    fid.close()
+def write_settings_to_file(args):
+    file_folder = "./ckpt/" + args.dataset_name + "/" + str(args.model_id) + "/"
+    file_name = args.dataset_name + "-results.log"
+    fid = open(file_folder + file_name, "a+")
+    string_to_write = "#" * 80 + "\n"
+    for arg in vars(args):
+        string_to_write += str(arg) + ": " + str(getattr(args, arg)) + "\n"
+    string_to_write += "*" * 80 + "\n"
+    fid.write(string_to_write)
+    fid.close()

feeders/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from . import feeder

feeders/feeder.py ADDED Viewed

	@@ -0,0 +1,313 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Adapted from https://github.com/lshiwjx/2s-AGCN for BABEL (https://babel.is.tue.mpg.de/)
+import json
+import math
+import os
+import os.path as osp
+import pdb
+import pickle
+import random
+import shutil
+import subprocess
+import sys
+import uuid
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+from feeders import tools
+from torch.utils.data import Dataset
+sys.path.extend(["../"])
+class Feeder(Dataset):
+    def __init__(
+        self,
+        data_path,
+        random_choose=False,
+        random_shift=False,
+        random_move=False,
+        window_size=-1,
+        debug=False,
+        use_mmap=True,
+        frame_pad=False,
+        nb_class=3,
+    ):
+        """
+        :param data_path:
+        :param label_path:
+        :param random_choose: If true, randomly choose a portion of the input sequence
+        :param random_shift: If true, randomly pad zeros at the begining or end of sequence
+        :param random_move:
+        :param window_size: The length of the output sequence
+        :param normalization: If true, normalize input sequence
+        :param debug: If true, only use the first 100 samples
+        :param use_mmap: If true, use mmap mode to load data, which can save the running memory
+        """
+        self.debug = debug
+        self.data_path = data_path
+        self.random_choose = random_choose
+        self.random_shift = random_shift
+        self.random_move = random_move
+        self.window_size = window_size
+        self.use_mmap = use_mmap
+        self.nb_class = nb_class
+        self.frame_pad = frame_pad
+        self.load_data()
+        self.count = 0
+        for i in range(len(self.data["X"])):
+            assert self.data["L"][i].shape[0] == self.data["X"][i].shape[1]
+        self.prediction = [
+            np.zeros((item.shape[0], 10, self.nb_class + 1), dtype=np.float32)
+            for item in self.data["L"]
+        ]
+        self.soft_labels = [
+            np.zeros((item.shape[0], self.nb_class + 1), dtype=np.float32)
+            for item in self.data["L"]
+        ]
+    def load_data(self):
+        # data: N, C, T, V, M
+        # load data
+        try:
+            with open(self.data_path) as f:
+                self.data = pickle.load(f)
+        except:
+            # for pickle file from python2
+            with open(self.data_path, "rb") as f:
+                self.data = pickle.load(f, encoding="latin1")
+    def label_update(self, results, indexs):
+        self.count += 1
+        # While updating the noisy label y_i by the probability s, we used the average output probability of the network of the past 10 epochs as s.
+        idx = (self.count - 1) % 10
+        for ind, res in zip(indexs, results):
+            self.prediction[ind][:, idx, :] = res
+        for i in range(len(self.prediction)):
+            self.soft_labels[i] = self.prediction[i].mean(axis=1)
+    def __len__(self):
+        return len(self.data["X"])
+    def __iter__(self):
+        return self
+    def __getitem__(self, index):
+        '''
+        data_numpy: read joints from PKL and no padding here as frame_pad is false
+        label: video level label
+        gt: action label for each frame
+        mask?
+        index?
+        frame_label: soft label
+        '''
+        data_numpy = self.data["X"][index]
+        data_numpy = np.array(data_numpy)
+        label = self.data["Y"][index]
+        label_np = np.zeros(self.nb_class)
+        for item in label:
+            label_np[item] = 1
+        label = np.array(label_np)
+        gt = self.data["L"][index]
+        gt = np.array(gt)
+        if self.random_shift:
+            data_numpy = tools.random_shift(data_numpy)
+        if self.random_choose:
+            data_numpy = tools.random_choose(data_numpy, self.window_size)
+        elif self.window_size > 0:
+            data_numpy = tools.auto_pading(data_numpy, self.window_size)
+        if self.random_move:
+            data_numpy = tools.random_move(data_numpy)
+        if self.frame_pad:
+            C, T, V, M = data_numpy.shape
+            if T % 15 != 0:
+                new_T = T + 15 - T % 15
+                data_numpy_paded = np.zeros((C, new_T, V, M))
+                data_numpy_paded[:, :T, :, :] = data_numpy
+                data_numpy = data_numpy_paded
+        mask = np.ones_like(gt)
+        frame_label = self.soft_labels[index]
+        return data_numpy, label, gt, mask, index, frame_label
+def import_class(name):
+    components = name.split(".")
+    mod = __import__(components[0])
+    for comp in components[1:]:
+        mod = getattr(mod, comp)
+    return mod
+def test(
+    dataset,
+    preds=None,
+    th=None,
+    idx=None,
+    graph="graph.ntu_rgb_d.Graph",
+    is_3d=True,
+    folder_p="viz",
+    label_json="prepare/configs/action_label_split1.json",
+):
+    """
+    vis the samples using matplotlib
+    :param data_path:
+    :param vid: the id of sample
+    :param graph:
+    :param is_3d: when vis NTU, set it True
+    :return:
+    """
+    with open(label_json) as infile:
+        jc = json.load(infile)
+    idx2act = {v: k for k, v in jc.items()}
+    idx2act[len(idx2act)] = "other"
+    if osp.exists(osp.join(folder_p, "frames")):
+        shutil.rmtree(osp.join(folder_p, "frames"))
+    os.makedirs(osp.join(folder_p, "frames"))
+    data, label, gt, _ = dataset[idx]
+    data = data.reshape((1,) + data.shape)
+    # for batch_idx, (data, label) in enumerate(loader):
+    N, C, T, V, M = data.shape
+    plt.ion()
+    fig = plt.figure()
+    if is_3d:
+        from mpl_toolkits.mplot3d import Axes3D
+        ax = fig.add_subplot(111, projection="3d")
+    else:
+        ax = fig.add_subplot(111)
+    if graph is None:
+        p_type = ["b.", "g.", "r.", "c.", "m.", "y.", "k.", "k.", "k.", "k."]
+        pose = [ax.plot(np.zeros(V), np.zeros(V), p_type[m])[0] for m in range(M)]
+        ax.axis([-1, 1, -1, 1])
+        for t in range(T):
+            for m in range(M):
+                pose[m].set_xdata(data[0, 0, t, :, m])
+                pose[m].set_ydata(data[0, 1, t, :, m])
+            fig.canvas.draw()
+            plt.pause(0.001)
+    else:
+        p_type = ["b-", "g-", "r-", "c-", "m-", "y-", "k-", "k-", "k-", "k-"]
+        import sys
+        from os import path
+        sys.path.append(
+            path.dirname(path.dirname(path.dirname(path.abspath(__file__))))
+        )
+        G = import_class(graph)()
+        edge = G.inward
+        pose = []
+        for m in range(M):
+            a = []
+            for i in range(len(edge)):
+                if is_3d:
+                    a.append(ax.plot(np.zeros(3), np.zeros(3), p_type[m])[0])
+                else:
+                    a.append(ax.plot(np.zeros(2), np.zeros(2), p_type[m])[0])
+            pose.append(a)
+        ax.axis([-1, 1, -1, 1])
+        if is_3d:
+            ax.set_zlim3d(-1, 1)
+        for t in range(T):
+            for m in range(M):
+                for i, (v1, v2) in enumerate(edge):
+                    x1 = data[0, :2, t, v1, m]
+                    x2 = data[0, :2, t, v2, m]
+                    if (x1.sum() != 0 and x2.sum() != 0) or v1 == 1 or v2 == 1:
+                        pose[m][i].set_xdata(data[0, 0, t, [v1, v2], m])
+                        pose[m][i].set_ydata(data[0, 1, t, [v1, v2], m])
+                        if is_3d:
+                            pose[m][i].set_3d_properties(data[0, 2, t, [v1, v2], m])
+            if gt[t]:
+                text = ax.text2D(
+                    0.1, 0.9, idx2act[int(label)], size=20, transform=ax.transAxes
+                )
+            if preds is not None:
+                pred_idx = preds[t].argmax()
+                text_pred = ax.text2D(
+                    0.6,
+                    0.9,
+                    idx2act[int(pred_idx)] + f": {preds[t, pred_idx]:.2f}",
+                    size=20,
+                    transform=ax.transAxes,
+                )
+            fig.canvas.draw()
+            plt.savefig(osp.join(folder_p, "frames", str(t) + ".jpg"), dpi=300)
+            if gt[t]:
+                text.remove()
+            if preds is not None:
+                text_pred.remove()
+        write_vid_from_imgs(folder_p, idx)
+def write_vid_from_imgs(folder_p, fname, fps=30):
+    """Collate frames into a video sequence.
+    Args:
+        folder_p (str): Frame images are in the path: folder_p/frames/<int>.jpg
+        fps (float): Output frame rate.
+    Returns:
+        Output video is stored in the path: folder_p/video.mp4
+    """
+    vid_p = osp.join(folder_p, f"{fname}.mp4")
+    cmd = [
+        "ffmpeg",
+        "-r",
+        str(int(fps)),
+        "-i",
+        osp.join(folder_p, "frames", "%d.jpg"),
+        "-y",
+        vid_p,
+    ]
+    FNULL = open(os.devnull, "w")
+    retcode = subprocess.call(cmd, stdout=FNULL, stderr=subprocess.STDOUT)
+    if not 0 == retcode:
+        print(
+            "*******ValueError(Error {0} executing command: {1}*********".format(
+                retcode, " ".join(cmd)
+            )
+        )
+    shutil.rmtree(osp.join(folder_p, "frames"))
+if __name__ == "__main__":
+    import os
+    os.environ["DISPLAY"] = "localhost:10.0"
+    data_path = "dataset/processed_data/train_split1.pkl"
+    graph = "graph.ntu_rgb_d.Graph"
+    dataset = Feeder(data_path)
+    test(dataset, idx=0, graph=graph, is_3d=True)

feeders/tools.py ADDED Viewed

	@@ -0,0 +1,234 @@

+import random
+import numpy as np
+import torch
+from torch.nn.utils.rnn import pad_sequence
+def downsample(data_numpy, step, random_sample=True):
+    # input: C,T,V,M
+    begin = np.random.randint(step) if random_sample else 0
+    return data_numpy[:, begin::step, :, :]
+def temporal_slice(data_numpy, step):
+    # input: C,T,V,M
+    C, T, V, M = data_numpy.shape
+    return (
+        data_numpy.reshape(C, T / step, step, V, M)
+        .transpose((0, 1, 3, 2, 4))
+        .reshape(C, T / step, V, step * M)
+    )
+def mean_subtractor(data_numpy, mean):
+    # input: C,T,V,M
+    # naive version
+    if mean == 0:
+        return
+    C, T, V, M = data_numpy.shape
+    valid_frame = (data_numpy != 0).sum(axis=3).sum(axis=2).sum(axis=0) > 0
+    begin = valid_frame.argmax()
+    end = len(valid_frame) - valid_frame[::-1].argmax()
+    data_numpy[:, :end, :, :] = data_numpy[:, :end, :, :] - mean
+    return data_numpy
+def auto_pading(data_numpy, size, random_pad=False):
+    C, T, V, M = data_numpy.shape
+    if T < size:
+        begin = random.randint(0, size - T) if random_pad else 0
+        data_numpy_paded = np.zeros((C, size, V, M))
+        data_numpy_paded[:, begin : begin + T, :, :] = data_numpy
+        return data_numpy_paded
+    else:
+        return data_numpy
+def random_choose(data_numpy, size, auto_pad=True):
+    # input: C,T,V,M 随机选择其中一段，不是很合理。因为有0
+    C, T, V, M = data_numpy.shape
+    if T == size:
+        return data_numpy
+    elif T < size:
+        if auto_pad:
+            return auto_pading(data_numpy, size, random_pad=True)
+        else:
+            return data_numpy
+    else:
+        begin = random.randint(0, T - size)
+        return data_numpy[:, begin : begin + size, :, :]
+def random_move(
+    data_numpy,
+    angle_candidate=[-10.0, -5.0, 0.0, 5.0, 10.0],
+    scale_candidate=[0.9, 1.0, 1.1],
+    transform_candidate=[-0.2, -0.1, 0.0, 0.1, 0.2],
+    move_time_candidate=[1],
+):
+    # input: C,T,V,M
+    C, T, V, M = data_numpy.shape
+    move_time = random.choice(move_time_candidate)
+    node = np.arange(0, T, T * 1.0 / move_time).round().astype(int)
+    node = np.append(node, T)
+    num_node = len(node)
+    A = np.random.choice(angle_candidate, num_node)
+    S = np.random.choice(scale_candidate, num_node)
+    T_x = np.random.choice(transform_candidate, num_node)
+    T_y = np.random.choice(transform_candidate, num_node)
+    a = np.zeros(T)
+    s = np.zeros(T)
+    t_x = np.zeros(T)
+    t_y = np.zeros(T)
+    # linspace
+    for i in range(num_node - 1):
+        a[node[i] : node[i + 1]] = (
+            np.linspace(A[i], A[i + 1], node[i + 1] - node[i]) * np.pi / 180
+        )
+        s[node[i] : node[i + 1]] = np.linspace(S[i], S[i + 1], node[i + 1] - node[i])
+        t_x[node[i] : node[i + 1]] = np.linspace(
+            T_x[i], T_x[i + 1], node[i + 1] - node[i]
+        )
+        t_y[node[i] : node[i + 1]] = np.linspace(
+            T_y[i], T_y[i + 1], node[i + 1] - node[i]
+        )
+    theta = np.array(
+        [[np.cos(a) * s, -np.sin(a) * s], [np.sin(a) * s, np.cos(a) * s]]
+    )  # xuanzhuan juzhen
+    # perform transformation
+    for i_frame in range(T):
+        xy = data_numpy[0:2, i_frame, :, :]
+        new_xy = np.dot(theta[:, :, i_frame], xy.reshape(2, -1))
+        new_xy[0] += t_x[i_frame]
+        new_xy[1] += t_y[i_frame]  # pingyi bianhuan
+        data_numpy[0:2, i_frame, :, :] = new_xy.reshape(2, V, M)
+    return data_numpy
+def random_shift(data_numpy):
+    # input: C,T,V,M 偏移其中一段
+    C, T, V, M = data_numpy.shape
+    data_shift = np.zeros(data_numpy.shape)
+    valid_frame = (data_numpy != 0).sum(axis=3).sum(axis=2).sum(axis=0) > 0
+    begin = valid_frame.argmax()
+    end = len(valid_frame) - valid_frame[::-1].argmax()
+    size = end - begin
+    bias = random.randint(0, T - size)
+    data_shift[:, bias : bias + size, :, :] = data_numpy[:, begin:end, :, :]
+    return data_shift
+def openpose_match(data_numpy):
+    C, T, V, M = data_numpy.shape
+    assert C == 3
+    score = data_numpy[2, :, :, :].sum(axis=1)
+    # the rank of body confidence in each frame (shape: T-1, M)
+    rank = (-score[0 : T - 1]).argsort(axis=1).reshape(T - 1, M)
+    # data of frame 1
+    xy1 = data_numpy[0:2, 0 : T - 1, :, :].reshape(2, T - 1, V, M, 1)
+    # data of frame 2
+    xy2 = data_numpy[0:2, 1:T, :, :].reshape(2, T - 1, V, 1, M)
+    # square of distance between frame 1&2 (shape: T-1, M, M)
+    distance = ((xy2 - xy1) ** 2).sum(axis=2).sum(axis=0)
+    # match pose
+    forward_map = np.zeros((T, M), dtype=int) - 1
+    forward_map[0] = range(M)
+    for m in range(M):
+        choose = rank == m
+        forward = distance[choose].argmin(axis=1)
+        for t in range(T - 1):
+            distance[t, :, forward[t]] = np.inf
+        forward_map[1:][choose] = forward
+    assert np.all(forward_map >= 0)
+    # string data
+    for t in range(T - 1):
+        forward_map[t + 1] = forward_map[t + 1][forward_map[t]]
+    # generate data
+    new_data_numpy = np.zeros(data_numpy.shape)
+    for t in range(T):
+        new_data_numpy[:, t, :, :] = data_numpy[:, t, :, forward_map[t]].transpose(
+            1, 2, 0
+        )
+    data_numpy = new_data_numpy
+    # score sort
+    trace_score = data_numpy[2, :, :, :].sum(axis=1).sum(axis=0)
+    rank = (-trace_score).argsort()
+    data_numpy = data_numpy[:, :, :, rank]
+    return data_numpy
+def pad(tensor, padding_value=0):
+    return pad_sequence(tensor, batch_first=True, padding_value=padding_value)
+def collate_with_padding(batch):
+    data = [torch.tensor(item[0].transpose(1, 0, 2, 3)) for item in batch]
+    target = [torch.tensor(item[1]) for item in batch]
+    gt = [torch.tensor(item[2]) for item in batch]
+    mask = [torch.tensor(item[3]) for item in batch]
+    data = pad(data).transpose(1, 2)
+    target = torch.tensor(target)
+    gt = pad(gt)
+    mask = pad(mask)
+    return [data, target, gt, mask]
+def collate_with_padding_multi(batch):
+    data = [torch.tensor(item[0].transpose(1, 0, 2, 3)) for item in batch]
+    target = [torch.tensor(item[1]) for item in batch]
+    gt = [torch.tensor(item[2]) for item in batch]
+    mask = [torch.tensor(item[3]) for item in batch]
+    data = pad(data).transpose(1, 2)
+    target = torch.stack(target)
+    gt = pad(gt)
+    mask = pad(mask)
+    return [data, target, gt, mask]
+def collate_with_padding_multi_velo(batch):
+    data = [torch.tensor(item[0].transpose(1, 0, 2, 3)) for item in batch]
+    velo = [torch.tensor(item[1].transpose(1, 0, 2, 3)) for item in batch]
+    target = [torch.tensor(item[2]) for item in batch]
+    gt = [torch.tensor(item[3]) for item in batch]
+    mask = [torch.tensor(item[4]) for item in batch]
+    data = pad(data).transpose(1, 2)
+    velo = pad(velo).transpose(1, 2)
+    target = torch.stack(target)
+    gt = pad(gt)
+    mask = pad(mask)
+    return [data, velo, target, gt, mask]
+def collate_with_padding_multi_joint(batch):
+    data = [torch.tensor(item[0].transpose(1, 0, 2, 3)) for item in batch] # shape?
+    target = [torch.tensor(item[1]) for item in batch]  # video level label
+    gt = [torch.tensor(item[2]) for item in batch] # frame level label
+    mask = [torch.tensor(item[3]) for item in batch]
+    index = [torch.tensor(item[4]) for item in batch]
+    soft_label = [torch.tensor(item[5]) for item in batch]
+    data = pad(data).transpose(1, 2) # pad joints seq with 0, rather than the last frame
+    target = torch.stack(target)
+    gt = pad(gt,padding_value=4)                     # pad frame level label with 0, so 0 hhere have to stands for 'background', o for 4 action it will be 0,1,2,3,4,
+    mask = pad(mask)
+    index = torch.tensor(index)
+    soft_label = pad(soft_label, padding_value=-100)
+    return [data, target, gt, mask, index, soft_label]

graph/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from . import kinetics, ntu_rgb_d, tools,ntu_rgb_d_infogcn, nturgbd_blockgcn

graph/kinetics.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import sys
+import networkx as nx
+import numpy as np
+from graph import tools
+sys.path.extend(["../"])
+# Joint index:
+# {0,  "Nose"}
+# {1,  "Neck"},
+# {2,  "RShoulder"},
+# {3,  "RElbow"},
+# {4,  "RWrist"},
+# {5,  "LShoulder"},
+# {6,  "LElbow"},
+# {7,  "LWrist"},
+# {8,  "RHip"},
+# {9,  "RKnee"},
+# {10, "RAnkle"},
+# {11, "LHip"},
+# {12, "LKnee"},
+# {13, "LAnkle"},
+# {14, "REye"},
+# {15, "LEye"},
+# {16, "REar"},
+# {17, "LEar"},
+# Edge format: (origin, neighbor)
+num_node = 18
+self_link = [(i, i) for i in range(num_node)]
+inward = [
+    (4, 3),
+    (3, 2),
+    (7, 6),
+    (6, 5),
+    (13, 12),
+    (12, 11),
+    (10, 9),
+    (9, 8),
+    (11, 5),
+    (8, 2),
+    (5, 1),
+    (2, 1),
+    (0, 1),
+    (15, 0),
+    (14, 0),
+    (17, 15),
+    (16, 14),
+]
+outward = [(j, i) for (i, j) in inward]
+neighbor = inward + outward
+class Graph:
+    def __init__(self, labeling_mode="spatial"):
+        self.A = self.get_adjacency_matrix(labeling_mode)
+        self.num_node = num_node
+        self.self_link = self_link
+        self.inward = inward
+        self.outward = outward
+        self.neighbor = neighbor
+    def get_adjacency_matrix(self, labeling_mode=None):
+        if labeling_mode is None:
+            return self.A
+        if labeling_mode == "spatial":
+            A = tools.get_spatial_graph(num_node, self_link, inward, outward)
+        else:
+            raise ValueError()
+        return A
+if __name__ == "__main__":
+    A = Graph("spatial").get_adjacency_matrix()
+    print("")

graph/ntu_rgb_d.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import sys
+from graph import tools
+sys.path.extend(["../"])
+num_node = 25
+self_link = [(i, i) for i in range(num_node)]
+inward_ori_index = [
+    (1, 2),
+    (2, 21),
+    (3, 21),
+    (4, 3),
+    (5, 21),
+    (6, 5),
+    (7, 6),
+    (8, 7),
+    (9, 21),
+    (10, 9),
+    (11, 10),
+    (12, 11),
+    (13, 1),
+    (14, 13),
+    (15, 14),
+    (16, 15),
+    (17, 1),
+    (18, 17),
+    (19, 18),
+    (20, 19),
+    (22, 23),
+    (23, 8),
+    (24, 25),
+    (25, 12),
+]
+inward = [(i - 1, j - 1) for (i, j) in inward_ori_index]
+outward = [(j, i) for (i, j) in inward]
+neighbor = inward + outward
+class Graph:
+    def __init__(self, labeling_mode="spatial"):
+        self.A = self.get_adjacency_matrix(labeling_mode)
+        self.num_node = num_node
+        self.self_link = self_link
+        self.inward = inward
+        self.outward = outward
+        self.neighbor = neighbor
+    def get_adjacency_matrix(self, labeling_mode=None):
+        if labeling_mode is None:
+            return self.A
+        if labeling_mode == "spatial":
+            A = tools.get_spatial_graph(num_node, self_link, inward, outward)
+        else:
+            raise ValueError()
+        return A
+if __name__ == "__main__":
+    import os
+    import matplotlib.pyplot as plt
+    # os.environ['DISPLAY'] = 'localhost:11.0'
+    A = Graph("spatial").get_adjacency_matrix()
+    for i in A:
+        plt.imshow(i, cmap="gray")
+        plt.show()
+    print(A)

graph/tools.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import numpy as np
+def edge2mat(link, num_node):
+    A = np.zeros((num_node, num_node))
+    for i, j in link:
+        A[j, i] = 1
+    return A
+def normalize_digraph(A):  # 除以每列的和
+    Dl = np.sum(A, 0)
+    h, w = A.shape
+    Dn = np.zeros((w, w))
+    for i in range(w):
+        if Dl[i] > 0:
+            Dn[i, i] = Dl[i] ** (-1)
+    AD = np.dot(A, Dn)
+    return AD
+def get_spatial_graph(num_node, self_link, inward, outward):
+    I = edge2mat(self_link, num_node)
+    In = normalize_digraph(edge2mat(inward, num_node))
+    Out = normalize_digraph(edge2mat(outward, num_node))
+    A = np.stack((I, In, Out))
+    return A
+import numpy as np
+def get_sgp_mat(num_in, num_out, link):
+    A = np.zeros((num_in, num_out))
+    for i, j in link:
+        A[i, j] = 1
+    A_norm = A / np.sum(A, axis=0, keepdims=True)
+    return A_norm
+def edge2mat(link, num_node):
+    A = np.zeros((num_node, num_node))
+    for i, j in link:
+        A[j, i] = 1
+    return A
+def get_k_scale_graph(scale, A):
+    if scale == 1:
+        return A
+    An = np.zeros_like(A)
+    A_power = np.eye(A.shape[0])
+    for k in range(scale):
+        A_power = A_power @ A
+        An += A_power
+    An[An > 0] = 1
+    return An
+def normalize_digraph(A):
+    Dl = np.sum(A, 0)
+    h, w = A.shape
+    Dn = np.zeros((w, w))
+    for i in range(w):
+        if Dl[i] > 0:
+            Dn[i, i] = Dl[i] ** (-1)
+    AD = np.dot(A, Dn)
+    return AD
+def get_spatial_graph(num_node, self_link, inward, outward):
+    I = edge2mat(self_link, num_node)
+    In = normalize_digraph(edge2mat(inward, num_node))
+    Out = normalize_digraph(edge2mat(outward, num_node))
+    A = np.stack((I, In, Out))
+    return A
+def normalize_adjacency_matrix(A):
+    node_degrees = A.sum(-1)
+    degs_inv_sqrt = np.power(node_degrees, -0.5)
+    norm_degs_matrix = np.eye(len(node_degrees)) * degs_inv_sqrt
+    return (norm_degs_matrix @ A @ norm_degs_matrix).astype(np.float32)
+def k_adjacency(A, k, with_self=False, self_factor=1):
+    assert isinstance(A, np.ndarray)
+    I = np.eye(len(A), dtype=A.dtype)
+    if k == 0:
+        return I
+    Ak = np.minimum(np.linalg.matrix_power(A + I, k), 1) \
+       - np.minimum(np.linalg.matrix_power(A + I, k - 1), 1)
+    if with_self:
+        Ak += (self_factor * I)
+    return Ak
+def get_multiscale_spatial_graph(num_node, self_link, inward, outward):
+    I = edge2mat(self_link, num_node)
+    A1 = edge2mat(inward, num_node)
+    A2 = edge2mat(outward, num_node)
+    A3 = k_adjacency(A1, 2)
+    A4 = k_adjacency(A2, 2)
+    A1 = normalize_digraph(A1)
+    A2 = normalize_digraph(A2)
+    A3 = normalize_digraph(A3)
+    A4 = normalize_digraph(A4)
+    A = np.stack((I, A1, A2, A3, A4))
+    return A
+def get_adjacency_matrix(edges, num_nodes):
+    A = np.zeros((num_nodes, num_nodes), dtype=np.float32)
+    for edge in edges:
+        A[edge] = 1.
+    return A
+def get_uniform_graph(num_node, self_link, neighbor):
+    A = normalize_digraph(edge2mat(neighbor + self_link, num_node))
+    return A

huggingface.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from huggingface_hub import upload_folder
+upload_folder(
+    folder_path="/root/autodl-tmp/workshop2",
+    repo_id="qiushuocheng/workshop",
+    repo_type="model",  # or "dataset", "space"
+    path_in_repo="",    # optional subdirectory in repo
+    commit_message="Initial upload"
+)

human_model/Put SMPLH model here.txt ADDED Viewed

File without changes

model/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from . import agcn
+from . import agcn_Unet
+from . import agcn_concat
+from . import agcn_MSFF
+from . import blockgcn_MSFF
+from . import blockgcn

model/agcn.py ADDED Viewed

	@@ -0,0 +1,278 @@

+"""
+Copyright 2023 LINE Corporation
+LINE Corporation licenses this file to you under the Apache License,
+version 2.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at:
+    https://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+License for the specific language governing permissions and limitations
+under the License.
+"""
+import math
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange, reduce, repeat
+from torch.autograd import Variable
+def import_class(name):
+    components = name.split(".")
+    mod = __import__(components[0])
+    for comp in components[1:]:
+        mod = getattr(mod, comp)
+    return mod
+def conv_branch_init(conv, branches):
+    weight = conv.weight
+    n = weight.size(0)
+    k1 = weight.size(1)
+    k2 = weight.size(2)
+    nn.init.normal_(weight, 0, math.sqrt(2.0 / (n * k1 * k2 * branches)))
+    nn.init.constant_(conv.bias, 0)
+def conv_init(conv):
+    nn.init.kaiming_normal_(conv.weight, mode="fan_out")
+    nn.init.constant_(conv.bias, 0)
+def bn_init(bn, scale):
+    nn.init.constant_(bn.weight, scale)
+    nn.init.constant_(bn.bias, 0)
+class unit_tcn(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size=9, stride=1):
+        super(unit_tcn, self).__init__()
+        pad = int((kernel_size - 1) / 2)
+        self.conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            kernel_size=(kernel_size, 1),
+            padding=(pad, 0),
+            stride=(stride, 1),
+        )
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU()
+        conv_init(self.conv)
+        bn_init(self.bn, 1)
+    def forward(self, x):
+        x = self.bn(self.conv(x))
+        return x
+class unit_gcn(nn.Module):
+    def __init__(self, in_channels, out_channels, A, coff_embedding=4, num_subset=3):
+        super(unit_gcn, self).__init__()
+        inter_channels = out_channels // coff_embedding
+        self.inter_c = inter_channels
+        self.PA = nn.Parameter(torch.from_numpy(A.astype(np.float32)))
+        nn.init.constant_(self.PA, 1e-6)
+        self.A = Variable(torch.from_numpy(A.astype(np.float32)), requires_grad=False)
+        self.num_subset = num_subset
+        self.conv_a = nn.ModuleList()
+        self.conv_b = nn.ModuleList()
+        self.conv_d = nn.ModuleList()
+        for i in range(self.num_subset):
+            self.conv_a.append(nn.Conv2d(in_channels, inter_channels, 1))
+            self.conv_b.append(nn.Conv2d(in_channels, inter_channels, 1))
+            self.conv_d.append(nn.Conv2d(in_channels, out_channels, 1))
+        if in_channels != out_channels:
+            self.down = nn.Sequential(
+                nn.Conv2d(in_channels, out_channels, 1), nn.BatchNorm2d(out_channels)
+            )
+        else:
+            self.down = lambda x: x
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.soft = nn.Softmax(-2)
+        self.relu = nn.ReLU()
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                conv_init(m)
+            elif isinstance(m, nn.BatchNorm2d):
+                bn_init(m, 1)
+        bn_init(self.bn, 1e-6)
+        for i in range(self.num_subset):
+            conv_branch_init(self.conv_d[i], self.num_subset)
+    def forward(self, x):
+        N, C, T, V = x.size()
+        A = self.A
+        if -1 != x.get_device():
+            A = A.cuda(x.get_device())
+        A = A + self.PA
+        y = None
+        for i in range(self.num_subset):
+            A1 = (
+                self.conv_a[i](x)
+                .permute(0, 3, 1, 2)
+                .contiguous()
+                .view(N, V, self.inter_c * T)
+            )
+            A2 = self.conv_b[i](x).view(N, self.inter_c * T, V)
+            A1 = self.soft(torch.matmul(A1, A2) / A1.size(-1))  # N V V
+            A1 = A1 + A[i]
+            A2 = x.view(N, C * T, V)
+            z = self.conv_d[i](torch.matmul(A2, A1).view(N, C, T, V))
+            y = z + y if y is not None else z
+        y = self.bn(y)
+        y += self.down(x)
+        return self.relu(y)
+class TCN_GCN_unit(nn.Module):
+    def __init__(self, in_channels, out_channels, A, stride=1, residual=True):
+        super(TCN_GCN_unit, self).__init__()
+        self.gcn1 = unit_gcn(in_channels, out_channels, A)
+        self.tcn1 = unit_tcn(out_channels, out_channels, stride=stride)
+        self.relu = nn.ReLU()
+        if not residual:
+            self.residual = lambda x: 0
+        elif (in_channels == out_channels) and (stride == 1):
+            self.residual = lambda x: x
+        else:
+            self.residual = unit_tcn(
+                in_channels, out_channels, kernel_size=1, stride=stride
+            )
+    def forward(self, x):
+        x = self.tcn1(self.gcn1(x)) + self.residual(x)
+        return self.relu(x)
+class Classifier(nn.Module):
+    def __init__(self, num_class=60, scale_factor=5.0, temperature=[1.0, 2.0, 5.0]):
+        super(Classifier, self).__init__()
+        # action features
+        self.ac_center = nn.Parameter(torch.zeros(num_class + 1, 256))
+        nn.init.xavier_uniform_(self.ac_center)
+        # foreground feature
+        self.temperature = temperature
+        self.scale_factor = scale_factor
+    def forward(self, x):
+        N = x.size(0)
+        x_emb = reduce(x, "(n m) c t v -> n t c", "mean", n=N)
+        norms_emb = F.normalize(x_emb, dim=2)
+        norms_ac = F.normalize(self.ac_center)
+        # generate foeground and action scores
+        frm_scrs = (
+            torch.einsum("ntd,cd->ntc", [norms_emb, norms_ac]) * self.scale_factor
+        )
+        # attention
+        class_wise_atts = [F.softmax(frm_scrs * t, 1) for t in self.temperature]
+        # multiple instance learning branch
+        # temporal score aggregation
+        mid_vid_scrs = [
+            torch.einsum("ntc,ntc->nc", [frm_scrs, att]) for att in class_wise_atts
+        ]
+        mil_vid_scr = (
+            torch.stack(mid_vid_scrs, -1).mean(-1) * 2.0
+        )  # frm_scrs have been multiplied by the scale factor
+        mil_vid_pred = F.sigmoid(mil_vid_scr)
+        return mil_vid_pred, frm_scrs
+class Model(nn.Module):
+    def __init__(
+        self,
+        num_class=60,
+        num_point=25,
+        num_person=1,
+        graph=None,
+        graph_args=dict(),
+        in_channels=2,
+        scale_factor=5.0,
+        temperature=[1.0, 2.0, 5.0],
+    ):
+        super(Model, self).__init__()
+        if graph is None:
+            raise ValueError()
+        else:
+            Graph = import_class(graph)
+            self.graph = Graph(**graph_args)
+        A = self.graph.A
+        self.data_bn = nn.BatchNorm1d(num_person * in_channels * num_point)
+        self.l1 = TCN_GCN_unit(3, 64, A, residual=False) # save (B,64,25,T)
+        self.l2 = TCN_GCN_unit(64, 64, A, stride=2)
+        self.l3 = TCN_GCN_unit(64, 64, A)
+        self.l4 = TCN_GCN_unit(64, 64, A)                # save (B,64,25,T/2)
+        self.l5 = TCN_GCN_unit(64, 128, A, stride=2)
+        self.l6 = TCN_GCN_unit(128, 128, A)
+        self.l7 = TCN_GCN_unit(128, 128, A)              # save (B,128,25,T/4)
+        self.l8 = TCN_GCN_unit(128, 256, A, stride=2)
+        self.l9 = TCN_GCN_unit(256, 256, A)
+        self.l10 = TCN_GCN_unit(256, 256, A)             # save (B,256,25,T/8)
+        bn_init(self.data_bn, 1)
+        self.classifier_1 = Classifier(num_class, scale_factor, temperature)
+        self.classifier_2 = Classifier(num_class, scale_factor, temperature)
+    def forward(self, x,mask):
+        N, C, T, V, M = x.size()
+        x = rearrange(x, "n c t v m -> n (m v c) t")
+        # x = self.data_bn(x)
+        x = rearrange(x, "n (m v c) t -> (n m) c t v", m=M, v=V, c=C)
+        x = self.l1(x)
+        x = self.l2(x)
+        x = self.l3(x)
+        x = self.l4(x)
+        x = self.l5(x)
+        x = self.l6(x)
+        x = self.l7(x)
+        x = self.l8(x)
+        x = self.l9(x)
+        x = self.l10(x)
+        mil_vid_pred_1, frm_scrs_1 = self.classifier_1(x)
+        mil_vid_pred_2, frm_scrs_2 = self.classifier_2(x.detach())
+        # print (frm_scrs_1.size(), T)
+        frm_scrs_1 = rearrange(frm_scrs_1, "n t c -> n c t")
+        frm_scrs_1 = F.interpolate(
+            frm_scrs_1, size=(T), mode="linear", align_corners=True
+        )
+        frm_scrs_1 = rearrange(frm_scrs_1, "n c t -> n t c")
+        frm_scrs_2 = rearrange(frm_scrs_2, "n t c -> n c t")
+        frm_scrs_2 = F.interpolate(
+            frm_scrs_2, size=(T), mode="linear", align_corners=True
+        )
+        frm_scrs_2 = rearrange(frm_scrs_2, "n c t -> n t c")
+        return mil_vid_pred_1, frm_scrs_1, mil_vid_pred_2, frm_scrs_2

model/losses.py ADDED Viewed

	@@ -0,0 +1,63 @@

+"""
+Copyright 2023 LINE Corporation
+LINE Corporation licenses this file to you under the Apache License,
+version 2.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at:
+    https://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+License for the specific language governing permissions and limitations
+under the License.
+"""
+from platform import mac_ver
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange, reduce, repeat
+from torch.autograd import Variable
+def kl_loss_compute(pred, soft_targets, reduce=True):
+    kl = F.kl_div(
+        F.log_softmax(pred, dim=1), F.softmax(soft_targets, dim=1), reduce=False
+    )
+    if reduce:
+        return torch.mean(torch.sum(kl, dim=1))
+    else:
+        return torch.sum(kl, 1)
+def mvl_loss(y_1, y_2, rate=0.2, weight=0.1):
+    y_1 = rearrange(y_1, "n t c -> (n t) c")
+    y_2 = rearrange(y_2, "n t c -> (n t) c")
+    loss_pick = weight * kl_loss_compute(
+        y_1, y_2, reduce=False
+    ) + weight * kl_loss_compute(y_2, y_1, reduce=False)
+    loss_pick = loss_pick.cpu().detach()
+    ind_sorted = torch.argsort(loss_pick.data)
+    loss_sorted = loss_pick[ind_sorted]
+    num_remember = int(rate * len(loss_sorted))
+    ind_update = ind_sorted[:num_remember]
+    loss = torch.mean(loss_pick[ind_update])
+    return loss
+def cross_entropy_loss(outputs, soft_targets):
+    mask = (soft_targets != -100).sum(1) > 0
+    outputs = outputs[mask]
+    soft_targets = soft_targets[mask]
+    loss = -torch.mean(torch.sum(F.log_softmax(outputs, dim=1) * soft_targets, dim=1))
+    return loss

prepare/configs/action_label_split1.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "walk": 0,
+  "stand": 1,
+  "turn": 2,
+  "jump": 3
+}

prepare/configs/action_label_split2.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "sit": 0,
+  "run": 1,
+  "stand up": 2,
+  "kick": 3
+}

prepare/configs/action_label_split3.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "jog": 0,
+  "wave": 1,
+  "dance": 2,
+  "gesture": 3
+}

prepare/create_dataset.py ADDED Viewed

	@@ -0,0 +1,370 @@

+# Adapted from https://github.com/abhinanda-punnakkal/BABEL/ to frame-wise motion segmentation
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:fenc=utf-8
+#
+# Copyright © 2021 achandrasekaran <arjun.chandrasekaran@tuebingen.mpg.de>
+#
+# Distributed under terms of the MIT license.
+import csv
+import json
+import os
+import pdb
+import pickle
+import sys
+from collections import *
+from itertools import *
+from os.path import basename as ospb
+from os.path import dirname as ospd
+from os.path import join as ospj
+import dutils
+import ipdb
+import numpy as np
+import pandas as pd
+# Custom
+import preprocess
+import torch
+import viz
+from pandas.core.common import flatten
+from tqdm import tqdm
+"""
+Script to load BABEL segments with NTU skeleton format and pre-process.
+"""
+def ntu_style_preprocessing(b_dset_path):
+    """"""
+    print("Load BABEL v1.0 dataset subset", b_dset_path)
+    b_dset = dutils.read_pkl(b_dset_path)
+    X_new = []
+    Y_new = []
+    id_new = []
+    for idx in range(len(b_dset["X"])):
+        # Get unnormalized 5-sec. samples
+        X = np.array(b_dset["X"][idx])
+        print("X (old) = ", np.shape(X))  # T, V, C
+        X = X[np.newaxis, :, :, :]
+        # Prep. data for normalization
+        X = X.transpose(0, 3, 1, 2)  # N, C, T, V
+        X = X[:, :, :, :, np.newaxis]  # N, C, T, V, M
+        print("Shape of prepped X: ", X.shape)
+        # Normalize (pre-process) in NTU RGBD-style
+        ntu_sk_spine_bone = np.array([0, 1])
+        ntu_sk_shoulder_bone = np.array([8, 4])
+        X, l_m_sk = preprocess.pre_normalization(
+            X, zaxis=ntu_sk_spine_bone, xaxis=ntu_sk_shoulder_bone
+        )
+        if len(l_m_sk) == 0:
+            X_new.append(X[0])
+            Y_new.append(b_dset["Y"][idx])
+            id_new.append(b_dset["sid"][idx])
+        else:
+            print("Skipped")
+    # Dataset w/ processed seg. chunks. (Skip samples w/ missing skeletons)
+    b_AR_dset = {"sid": id_new, "X": X_new, "Y": Y_new}
+    fp = b_dset_path.replace("samples", "ntu_sk_ntu-style_preprocessed")
+    # fp = '../data/babel_v1.0/babel_v1.0_ntu_sk_ntu-style_preprocessed.pkl'
+    # dutils.write_pkl(b_AR_dset, fp)
+    with open(fp, "wb") as of:
+        pickle.dump(b_AR_dset, of, protocol=4)
+def get_act_idx(y, act2idx, n_classes):
+    """"""
+    if y in act2idx:
+        return act2idx[y]
+    else:
+        return n_classes
+def store_splits_subsets(
+    n_classes, spl, plus_extra=True, w_folder="../data_created/babel_v1.0/"
+):
+    """"""
+    # Get splits
+    splits = dutils.read_json("../data_created/amass_splits.json")
+    sid2split = {
+        int(ospb(u).replace(".mp4", "")): spl for spl in splits for u in splits[spl]
+    }
+    # In labels, act. cat. --> idx
+    act2idx_150 = dutils.read_json("../data_created/action_label_2_idx.json")
+    act2idx = {k: act2idx_150[k] for k in act2idx_150 if act2idx_150[k] < n_classes}
+    print("{0} actions in label set: {1}".format(len(act2idx), act2idx))
+    if plus_extra:
+        fp = w_folder + "babel_v1.0_" + spl + "_extra_ntu_sk_ntu-style_preprocessed.pkl"
+    else:
+        fp = w_folder + "babel_v1.0_" + spl + "_ntu_sk_ntu-style_preprocessed.pkl"
+    # Get full dataset
+    b_AR_dset = dutils.read_pkl(fp)
+    # Store idxs of samples to include in learning
+    split_idxs = defaultdict(list)
+    for i, y1 in enumerate(b_AR_dset["Y1"]):
+        # Check if action category in list of classes
+        if y1 not in act2idx:
+            continue
+        sid = b_AR_dset["sid"][i]
+        split_idxs[sid2split[sid]].append(i)  # Include idx in dataset
+    # Save features that'll be loaded by dataloader
+    ar_idxs = np.array(split_idxs[spl])
+    X = b_AR_dset["X"][ar_idxs]
+    if plus_extra:
+        fn = w_folder + f"{spl}_extra_ntu_sk_{n_classes}.npy"
+    else:
+        fn = w_folder + f"{spl}_ntu_sk_{n_classes}.npy"
+    np.save(fn, X)
+    # labels
+    labels = {k: np.array(b_AR_dset[k])[ar_idxs] for k in b_AR_dset if k != "X"}
+    # Create, save label data structure that'll be loaded by dataloader
+    label_idxs = defaultdict(list)
+    for i, y1 in enumerate(labels["Y1"]):
+        # y1
+        label_idxs["Y1"].append(act2idx[y1])
+        # yk
+        yk = [get_act_idx(y, act2idx, n_classes) for y in labels["Yk"][i]]
+        label_idxs["Yk"].append(yk)
+        # yov
+        yov_o = labels["Yov"][i]
+        yov = {get_act_idx(y, act2idx, n_classes): yov_o[y] for y in yov_o}
+        label_idxs["Yov"].append(yov)
+        #
+        label_idxs["seg_id"].append(labels["seg_id"][i])
+        label_idxs["sid"].append(labels["sid"][i])
+        label_idxs["chunk_n"].append(labels["chunk_n"][i])
+        label_idxs["anntr_id"].append(labels["anntr_id"][i])
+    if plus_extra:
+        wr_f = w_folder + f"{spl}_extra_label_{n_classes}.pkl"
+    else:
+        wr_f = w_folder + f"{spl}_label_{n_classes}.pkl"
+    dutils.write_pkl(
+        (
+            label_idxs["seg_id"],
+            (
+                label_idxs["Y1"],
+                label_idxs["sid"],
+                label_idxs["chunk_n"],
+                label_idxs["anntr_id"],
+            ),
+        ),
+        wr_f,
+    )
+class Babel_AR:
+    """Object containing data, methods for Action Recognition.
+    Task
+    -----
+    Given: x (Segment from Babel)
+    Predict: \hat{p}(x) (Distribution over action categories)
+    GT
+    ---
+    How to compute GT for a given segment?
+    - yk: All action categories that are labeled for the entirety of segment
+    - y1: One of yk
+    - yov: Any y that belongs to part of a segment is considered to be GT.
+           Fraction of segment covered by an action: {'walk': 1.0, 'wave': 0.5}
+    """
+    def __init__(self, dataset, dense=True):
+        """Dataset with (samples, different GTs)"""
+        # Load dataset
+        self.babel = dataset
+        self.dense = dense
+        self.jpos_p = "dataset/amass"
+        # Get frame-rate for each seq. in AMASS
+        f_p = "dataset/BABEL/action_recognition/data/featp_2_fps.json"
+        self.ft_p_2_fps = dutils.read_json(f_p)
+        # Dataset w/ keys = {'X', 'Y1', 'Yk', 'Yov', 'seg_id',  'sid',
+        # 'seg_dur'}
+        self.d = defaultdict(list)
+        for ann in tqdm(self.babel):
+            self._update_dataset(ann)
+    def _subsample_to_30fps(self, orig_ft, orig_fps):
+        """Get features at 30fps frame-rate
+        Args:
+            orig_ft <array> (T, 25*3): Feats. @ `orig_fps` frame-rate
+            orig_fps <float>: Frame-rate in original (ft) seq.
+        Return:
+            ft <array> (T', 25*3): Feats. @ 30fps
+        """
+        T, n_j, _ = orig_ft.shape
+        out_fps = 30.0
+        # Matching the sub-sampling used for rendering
+        if int(orig_fps) % int(out_fps):
+            sel_fr = np.floor(orig_fps / out_fps * np.arange(int(out_fps))).astype(int)
+            n_duration = int(T / int(orig_fps))
+            t_idxs = []
+            for i in range(n_duration):
+                t_idxs += list(i * int(orig_fps) + sel_fr)
+            if int(T % int(orig_fps)):
+                last_sec_frame_idx = n_duration * int(orig_fps)
+                t_idxs += [
+                    x + last_sec_frame_idx for x in sel_fr if x + last_sec_frame_idx < T
+                ]
+        else:
+            t_idxs = np.arange(0, T, orig_fps / out_fps, dtype=int)
+        ft = orig_ft[t_idxs, :, :]
+        return ft
+    def _viz_x(self, ft, fn="test_sample"):
+        """Wraper to Viz. the given sample (w/ NTU RGBD skeleton)"""
+        viz.viz_seq(seq=ft, folder_p=f"test_viz/{fn}", sk_type="nturgbd", debug=True)
+        return None
+    def _load_seq_feats(self, ft_p, sk_type):
+        """Given path to joint position features, return them in 30fps"""
+        # Identify appropriate feature directory path on disk
+        if "smpl_wo_hands" == sk_type:  # SMPL w/o hands (T, 22*3)
+            jpos_p = ospj(self.jpos_p, "joint_pos")
+        if "nturgbd" == sk_type:  # NTU (T, 219)
+            jpos_p = ospj(self.jpos_p, "babel_joint_pos")
+        # Get the correct dataset folder name
+        ddir_n = ospb(ospd(ospd(ft_p)))
+        ddir_map = {"BioMotionLab_NTroje": "BMLrub", "DFaust_67": "DFaust"}
+        ddir_n = ddir_map[ddir_n] if ddir_n in ddir_map else ddir_n
+        # Get the subject folder name
+        sub_fol_n = ospb(ospd(ft_p))
+        # Sanity check
+        fft_p = ospj(jpos_p, ddir_n, sub_fol_n, ospb(ft_p))
+        assert os.path.exists(fft_p)
+        # Load seq. fts.
+        ft = np.load(fft_p)["joint_pos"]
+        T, ft_sz = ft.shape
+        # Get NTU skeleton joints
+        ntu_js = dutils.smpl_to_nturgbd(model_type="smplh", out_format="nturgbd")
+        ft = ft.reshape(T, -1, 3)
+        ft = ft[:, ntu_js, :]
+        # Sub-sample to 30fps
+        orig_fps = self.ft_p_2_fps[ft_p]
+        ft = self._subsample_to_30fps(ft, orig_fps)
+        # print(f'Feat. shape = {ft.shape}, fps = {orig_fps}')
+        # if orig_fps != 30.0:
+        #   self._viz_x(ft)
+        return ft
+    def _get_per_f_labels(self, ann, ann_type, seq_dur):
+        """ """
+        # Per-frame labels: {0: ['walk'], 1: ['walk', 'wave'], ... T: ['stand']}
+        yf = defaultdict(list)
+        T = int(30.0 * seq_dur)
+        for n_f in range(T):
+            cur_t = float(n_f / 30.0)
+            for seg in ann["labels"]:
+                if seg["act_cat"] is None:
+                    continue
+                if "seq_ann" == ann_type:
+                    seg["start_t"] = 0.0
+                    seg["end_t"] = seq_dur
+                if cur_t >= float(seg["start_t"]) and cur_t < float(seg["end_t"]):
+                    yf[n_f] += seg["act_cat"]
+        return yf
+    def _compute_dur_samples(self, id, ann, ann_type, seq_ft, seq_dur, dur=5.0):
+        """Return each motion and its frame-wise GT action
+        Return:
+        [ { 'seg_id': motion id,
+            'x': motion feats,
+            'yall': labels of each motion,
+          { ... }, ...
+        ]
+        """
+        yf = self._get_per_f_labels(ann, ann_type, seq_dur)
+        seq_ft = seq_ft[:len(yf)]
+        assert seq_ft.shape[0] == len(yf)
+        seq_samples = []
+        seq_samples.append(
+            {"seg_id": id, "x": seq_ft, "y": yf,}
+        )
+        return seq_samples
+    def _sample_at_seg_chunk_level(self, ann, seq_samples):
+        # Samples at segment-chunk-level
+        for i, sample in enumerate(seq_samples):
+            self.d["sid"].append(ann["babel_sid"])  # Seq. info
+            self.d["X"].append(sample["x"])  # motion feats.
+            self.d["Y"].append(sample["y"])  # labels of each motion.
+        return
+    def _update_dataset(self, ann):
+        """Return one sample (one segment) = (X, Y1, Yall)"""
+        # Get feats. for seq.
+        seq_ft = self._load_seq_feats(ann["feat_p"], "nturgbd")
+        # To keep track of type of annotation for loading 'extra'
+        # Compute all GT labels for this seq.
+        seq_samples = None
+        if self.dense:
+            if ann["frame_ann"] is not None:
+                ann_ar = ann["frame_ann"]
+                seq_samples = self._compute_dur_samples(
+                    ann["babel_sid"], ann_ar, "frame_ann", seq_ft, ann["dur"]
+                )
+                self._sample_at_seg_chunk_level(ann, seq_samples)
+            else:
+                print("not supported data")
+        else:
+            raise NotImplementedError
+        return
+#  Create dataset
+# --------------------------
+d_folder = "dataset/babel_v1.0_release/"
+w_folder = "dataset/babel_v1.0_sequence/"
+os.makedirs(w_folder, exist_ok=True)
+for spl in ["train", "val"]:
+    # Load Dense BABEL
+    data = dutils.read_json(ospj(d_folder, f"{spl}.json"))
+    dataset = [data[sid] for sid in data]
+    dense_babel = Babel_AR(dataset, dense=True)
+    # Store Dense BABEL
+    d_filename = w_folder + "babel_v1.0_" + spl + "_samples.pkl"
+    dutils.write_pkl(dense_babel.d, d_filename)
+    #  Pre-process, Store data in dataset
+    print("NTU-style preprocessing")
+    babel_dataset_AR = ntu_style_preprocessing(d_filename)

prepare/dutils.py ADDED Viewed

	@@ -0,0 +1,310 @@

+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:fenc=utf-8
+#
+# Copyright © 2021 achandrasekaran <arjun.chandrasekaran@tuebingen.mpg.de>
+#
+# Distributed under terms of the MIT license.
+import csv
+import json
+import os
+import os.path as osp
+import pdb
+import pickle
+import sys
+from collections import Counter
+from os.path import basename as ospb
+from os.path import dirname as ospd
+from os.path import join as ospj
+import numpy as np
+import torch
+import viz
+from smplx import SMPLH
+from tqdm import tqdm
+def read_json(json_filename):
+    """Return contents of JSON file"""
+    jc = None
+    with open(json_filename) as infile:
+        jc = json.load(infile)
+    return jc
+def read_pkl(pkl_filename):
+    """Return contents of pikcle file"""
+    pklc = None
+    with open(pkl_filename, "rb") as infile:
+        pklc = pickle.load(infile)
+    return pklc
+def write_json(contents, filename):
+    with open(filename, "w") as outfile:
+        json.dump(contents, outfile, indent=2)
+def write_pkl(contents, filename):
+    with open(filename, "wb") as outfile:
+        pickle.dump(contents, outfile)
+def smpl_to_nturgbd(model_type="smplh", out_format="nturgbd"):
+    """Borrowed from https://gitlab.tuebingen.mpg.de/apunnakkal/2s_agcn/-/blob/master/data_gen/smpl_data_utils.py
+    NTU mapping
+    -----------
+    0 --> ?
+    1-base of the spine
+    2-middle of the spine
+    3-neck
+    4-head
+    5-left shoulder
+    6-left elbow
+    7-left wrist
+    8-left hand
+    9-right shoulder
+    10-right elbow
+    11-right wrist
+    12-right hand
+    13-left hip
+    14-left knee
+    15-left ankle
+    16-left foot
+    17-right hip
+    18-right knee
+    19-right ankle
+    20-right foot
+    21-spine
+    22-tip of the left hand
+    23-left thumb
+    24-tip of the right hand
+    25-right thumb
+    :param model_type:
+    :param out_format:
+    :return:
+    """
+    if model_type == "smplh" and out_format == "nturgbd":
+        "22 and 37 are approximation for hand (base of index finger)"
+        return np.array(
+            [
+                0,
+                3,
+                12,
+                15,
+                16,
+                18,
+                20,
+                22,  # left hand
+                17,
+                19,
+                21,
+                37,  # right hand
+                1,
+                4,
+                7,
+                10,  # left leg
+                2,
+                5,
+                8,
+                11,  # right hand
+                9,
+                63,
+                64,
+                68,
+                69,
+            ],
+            dtype=np.int32,
+        )
+class dotdict(dict):
+    """dot.notation access to dictionary attributes"""
+    __getattr__ = dict.get
+    __setattr__ = dict.__setitem__
+    __delattr__ = dict.__delitem__
+def store_counts(label_fp):
+    """Compute # samples per class, from stored labels
+    Args:
+            label_fp <str>: Path to label file
+    Writes (to same path as label file):
+            out_fp <dict>: # samples per class = {<idx>: <count>, ...}
+    """
+    Y_tup = read_pkl(label_fp)
+    Y_idxs = Y_tup[1][0]
+    print("# Samples in set = ", len(Y_idxs))
+    label_count = Counter(Y_idxs)
+    print("File ", label_fp, "len", len(label_count))
+    out_fp = label_fp.replace(".pkl", "_count.pkl")
+    write_pkl(label_count, out_fp)
+def load_babel_dataset(d_folder="dataset/babel_v1.0_release"):
+    """Load the BABEL dataset"""
+    # Data folder
+    l_babel_dense_files = ["train", "val", "test"]
+    l_babel_extra_files = ["extra_train", "extra_val"]
+    # BABEL Dataset
+    babel = {}
+    for fn in l_babel_dense_files + l_babel_extra_files:
+        babel[fn] = json.load(open(ospj(d_folder, fn + ".json")))
+    return babel
+def store_seq_fps(amass_p):
+    """Get fps for each seq. in BABEL
+    Arguments:
+    ---------
+            amass_p <str>: Path where you download AMASS to.
+    Save:
+    -----
+            featp_2_fps.json <dict>: Key: feat path <str>, value: orig. fps
+    in AMASS <float>. E.g.,: {'KIT/KIT/4/RightTurn01_poses.npz': 100.0, ...}
+    """
+    # Get BABEL dataset
+    babel = load_babel_dataset()
+    # Loop over each BABEL seq, store frame-rate
+    ft_p_2_fps = {}
+    for fn in babel:
+        for sid in tqdm(babel[fn]):
+            ann = babel[fn][sid]
+            # print (ann)
+            if ann["feat_p"] not in ft_p_2_fps:
+                ddir_n = ann["feat_p"]
+                ddir_n_ = ddir_n.split("/")
+                ddir_n_ = ddir_n_[1:]
+                ddir_map = {"BMLrub": "BioMotionLab_NTroje", "DFaust": "DFaust_67"}
+                ddir_n_[0] = (
+                    ddir_map[ddir_n_[0]] if ddir_n_[0] in ddir_map else ddir_n_[0]
+                )
+                p = ospj(amass_p, "/".join(ddir_n_))
+                fps = np.load(p)["mocap_framerate"]
+                ft_p_2_fps[ann["feat_p"]] = float(fps)
+    dest_fp = "dataset/featp_2_fps.json"
+    write_json(ft_p_2_fps, dest_fp)
+    return None
+def store_ntu_jpos(smplh_model_p, dest_jpos_p, amass_p):
+    """Store joint positions of kfor NTU-RGBD skeleton"""
+    # Model to forward-pass through, to store joint positions
+    smplh = SMPLH(
+        smplh_model_p,
+        create_transl=False,
+        ext="pkl",
+        gender="male",
+        use_pca=False,
+        batch_size=1,
+    )
+    # Load paths to all BABEL features
+    featp_2_fps = read_json("dataset/featp_2_fps.json")
+    # Loop over all BABEL data, verify that joint positions are stored on disk
+    l_m_ft_p = []
+    for ft_p in featp_2_fps:
+        # Get the correct dataset folder name
+        ddir_n = ospb(ospd(ospd(ft_p)))
+        ddir_map = {"BioMotionLab_NTroje": "BMLrub", "DFaust_67": "DFaust"}
+        ddir_n = ddir_map[ddir_n] if ddir_n in ddir_map else ddir_n
+        # Get the subject folder name
+        sub_fol_n = ospb(ospd(ft_p))
+        # Sanity check
+        fft_p = ospj(dest_jpos_p, ddir_n, sub_fol_n, ospb(ft_p))
+        if not os.path.exists(fft_p):
+            l_m_ft_p.append((ft_p, fft_p))
+    print("Total # missing NTU RGBD skeleton features = ", len(l_m_ft_p))
+    # Loop over missing joint positions and store them on disk
+    for i, (ft_p, ntu_jpos_p) in enumerate(tqdm(l_m_ft_p)):
+        ft_p_ = ft_p.split("/")
+        ft_p_ = ft_p_[1:]
+        ft_p = ospj(amass_p, "/".join(ft_p_))
+        jrot_smplh = np.load(ft_p)["poses"]
+        # Break joints down into body parts
+        smpl_body_jrot = jrot_smplh[:, 3:66]
+        left_hand_jrot = jrot_smplh[:, 66:111]
+        right_hand_jrot = jrot_smplh[:, 111:]
+        root_orient = jrot_smplh[:, 0:3].reshape(-1, 3)
+        # Forward through model to get a superset of required joints
+        T = jrot_smplh.shape[0]
+        ntu_jpos = np.zeros((T, 219))
+        for t in range(T):
+            res = smplh(
+                body_pose=torch.Tensor(smpl_body_jrot[t : t + 1, :]),
+                global_orient=torch.Tensor(root_orient[t : t + 1, :]),
+                left_hand_pose=torch.Tensor(left_hand_jrot[t : t + 1, :]),
+                right_hand_pose=torch.Tensor(right_hand_jrot[t : t + 1, :]),
+                # transl=torch.Tensor(transl)
+            )
+            jpos = res.joints.detach().cpu().numpy()[:, :, :].reshape(-1)
+            ntu_jpos[t, :] = jpos
+        # Save to disk
+        if not os.path.exists(ospd(ntu_jpos_p)):
+            os.makedirs(ospd(ntu_jpos_p))
+        np.savez(ntu_jpos_p, joint_pos=ntu_jpos, allow_pickle=True)
+    return
+def viz_ntu_jpos(jpos_p, l_ft_p):
+    """Visualize sequences of NTU-skeleton joint positions"""
+    # Indices that are in the NTU RGBD skeleton
+    smpl2nturgbd = smpl_to_nturgbd()
+    # Iterate over each
+    for ft_p in l_ft_p:
+        x = np.load(ospj(jpos_p, ft_p))["joint_pos"]
+        T, ft_sz = x.shape
+        x = x.reshape(T, ft_sz // 3, 3)
+        # print('Data shape = {0}'.format(x.shape))
+        x = x[:, smpl2nturgbd, :]
+        # print('Data shape = {0}'.format(x.shape))
+        # x = x[:,:,:, 0].transpose(1, 2, 0)	# (3, 150, 22, 1) --> (150, 22, 3)
+        print("Data shape = {0}".format(x.shape))
+        viz.viz_seq(
+            seq=x, folder_p="test_viz/test_ntu_w_axis", sk_type="nturgbd", debug=True
+        )
+        print("-" * 50)
+def main():
+    """Store preliminary stuff"""
+    amass_p = "dataset/amass/"
+    # Save feature paths --> fps (released in babel/action_recognition/data/)
+    store_seq_fps(amass_p)
+    # Save joint positions in NTU-RGBD skeleton format
+    smplh_model_p = "./human_model/SMPLH_male.pkl"
+    # model is generated by https://github.com/vchoutas/smplx
+    jpos_p = "./dataset/amass/babel_joint_pos"
+    store_ntu_jpos(smplh_model_p, jpos_p, amass_p)
+    #  Viz. saved seqs.
+    l_ft_p = ["ACCAD/Male2MartialArtsStances_c3d/D7 - walk to bow_poses.npz"]
+    viz_ntu_jpos(jpos_p, l_ft_p)
+if __name__ == "__main__":
+    main()

prepare/generate_dataset.sh ADDED Viewed

	@@ -0,0 +1,5 @@

+python prepare/dutils.py
+python prepare/create_dataset.py
+python prepare/split_dataset.py --split 1
+# python prepare/split_dataset.py --split 2
+# python prepare/split_dataset.py --split 3

prepare/preprocess.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import sys
+from rotation import *
+from tqdm import tqdm
+def pre_normalization(data, zaxis=[0, 1], xaxis=[8, 4]):
+    N, C, T, V, M = data.shape
+    s = np.transpose(data, [0, 4, 2, 3, 1])  # N, C, T, V, M  to  N, M, T, V, C
+    l_m_sk = []  # List idxs of missing skeletons
+    print("pad the null frames with the previous frames")
+    for i_s, skeleton in enumerate(tqdm(s)):  # pad
+        if skeleton.sum() == 0:
+            print(i_s, " has no skeleton")
+            l_m_sk.append(i_s)
+        for i_p, person in enumerate(skeleton):
+            if person.sum() == 0:
+                continue
+            if person[0].sum() == 0:
+                index = person.sum(-1).sum(-1) != 0
+                tmp = person[index].copy()
+                person *= 0
+                person[: len(tmp)] = tmp
+            for i_f, frame in enumerate(person):
+                if frame.sum() == 0:
+                    if person[i_f:].sum() == 0:
+                        rest = len(person) - i_f
+                        num = int(np.ceil(rest / i_f))
+                        pad = np.concatenate([person[0:i_f] for _ in range(num)], 0)[
+                            :rest
+                        ]
+                        s[i_s, i_p, i_f:] = pad
+                        break
+    print("sub the center joint #1 (spine joint in ntu and neck joint in kinetics)")
+    for i_s, skeleton in enumerate(tqdm(s)):
+        if skeleton.sum() == 0:
+            continue
+        main_body_center = skeleton[0][:, 1:2, :].copy()
+        for i_p, person in enumerate(skeleton):
+            if person.sum() == 0:
+                continue
+            mask = (person.sum(-1) != 0).reshape(T, V, 1)
+            s[i_s, i_p] = (s[i_s, i_p] - main_body_center) * mask
+    print(
+        "parallel the bone between hip(jpt 0) and spine(jpt 1) of the first person to the z axis"
+    )
+    for i_s, skeleton in enumerate(tqdm(s)):
+        if skeleton.sum() == 0:
+            continue
+        joint_bottom = skeleton[0, 0, zaxis[0]]
+        joint_top = skeleton[0, 0, zaxis[1]]
+        axis = np.cross(joint_top - joint_bottom, [0, 0, 1])
+        angle = angle_between(joint_top - joint_bottom, [0, 0, 1])
+        matrix_z = rotation_matrix(axis, angle)
+        for i_p, person in enumerate(skeleton):
+            if person.sum() == 0:
+                continue
+            for i_f, frame in enumerate(person):
+                if frame.sum() == 0:
+                    continue
+                for i_j, joint in enumerate(frame):
+                    s[i_s, i_p, i_f, i_j] = np.dot(matrix_z, joint)
+    print(
+        "parallel the bone between right shoulder(jpt 8) and left shoulder(jpt 4) of the first person to the x axis"
+    )
+    for i_s, skeleton in enumerate(tqdm(s)):
+        if skeleton.sum() == 0:
+            continue
+        joint_rshoulder = skeleton[0, 0, xaxis[0]]
+        joint_lshoulder = skeleton[0, 0, xaxis[1]]
+        axis = np.cross(joint_rshoulder - joint_lshoulder, [1, 0, 0])
+        angle = angle_between(joint_rshoulder - joint_lshoulder, [1, 0, 0])
+        matrix_x = rotation_matrix(axis, angle)
+        for i_p, person in enumerate(skeleton):
+            if person.sum() == 0:
+                continue
+            for i_f, frame in enumerate(person):
+                if frame.sum() == 0:
+                    continue
+                for i_j, joint in enumerate(frame):
+                    s[i_s, i_p, i_f, i_j] = np.dot(matrix_x, joint)
+    data = np.transpose(s, [0, 4, 2, 3, 1])
+    return data, l_m_sk
+if __name__ == "__main__":
+    data = np.load("../data/ntu/xview/val_data.npy")
+    pre_normalization(data)
+    np.save("../data/ntu/xview/data_val_pre.npy", data)

prepare/rotation.py ADDED Viewed

	@@ -0,0 +1,91 @@

+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:fenc=utf-8
+#
+# Copyright © 2021 achandrasekaran <arjun.chandrasekaran@tuebingen.mpg.de>
+#
+# Distributed under terms of the MIT license.
+import math
+import numpy as np
+def rotation_matrix(axis, theta):
+    """
+    Return the rotation matrix associated with counterclockwise rotation about
+    the given axis by theta radians.
+    """
+    if np.abs(axis).sum() < 1e-6 or np.abs(theta) < 1e-6:
+        return np.eye(3)
+    axis = np.asarray(axis)
+    axis = axis / math.sqrt(np.dot(axis, axis))
+    a = math.cos(theta / 2.0)
+    b, c, d = -axis * math.sin(theta / 2.0)
+    aa, bb, cc, dd = a * a, b * b, c * c, d * d
+    bc, ad, ac, ab, bd, cd = b * c, a * d, a * c, a * b, b * d, c * d
+    return np.array(
+        [
+            [aa + bb - cc - dd, 2 * (bc + ad), 2 * (bd - ac)],
+            [2 * (bc - ad), aa + cc - bb - dd, 2 * (cd + ab)],
+            [2 * (bd + ac), 2 * (cd - ab), aa + dd - bb - cc],
+        ]
+    )
+def unit_vector(vector):
+    """ Returns the unit vector of the vector.  """
+    return vector / np.linalg.norm(vector)
+def angle_between(v1, v2):
+    """Returns the angle in radians between vectors 'v1' and 'v2'::
+    >>> angle_between((1, 0, 0), (0, 1, 0))
+    1.5707963267948966
+    >>> angle_between((1, 0, 0), (1, 0, 0))
+    0.0
+    >>> angle_between((1, 0, 0), (-1, 0, 0))
+    3.141592653589793
+    """
+    if np.abs(v1).sum() < 1e-6 or np.abs(v2).sum() < 1e-6:
+        return 0
+    v1_u = unit_vector(v1)
+    v2_u = unit_vector(v2)
+    return np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0))
+def x_rotation(vector, theta):
+    """Rotates 3-D vector around x-axis"""
+    R = np.array(
+        [
+            [1, 0, 0],
+            [0, np.cos(theta), -np.sin(theta)],
+            [0, np.sin(theta), np.cos(theta)],
+        ]
+    )
+    return np.dot(R, vector)
+def y_rotation(vector, theta):
+    """Rotates 3-D vector around y-axis"""
+    R = np.array(
+        [
+            [np.cos(theta), 0, np.sin(theta)],
+            [0, 1, 0],
+            [-np.sin(theta), 0, np.cos(theta)],
+        ]
+    )
+    return np.dot(R, vector)
+def z_rotation(vector, theta):
+    """Rotates 3-D vector around z-axis"""
+    R = np.array(
+        [
+            [np.cos(theta), -np.sin(theta), 0],
+            [np.sin(theta), np.cos(theta), 0],
+            [0, 0, 1],
+        ]
+    )
+    return np.dot(R, vector)

prepare/split_dataset.py ADDED Viewed

	@@ -0,0 +1,143 @@

+"""
+Copyright 2023 LINE Corporation
+LINE Corporation licenses this file to you under the Apache License,
+version 2.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at:
+    https://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+License for the specific language governing permissions and limitations
+under the License.
+"""
+import numpy as np
+import dutils
+import pandas as pd
+from collections import Counter
+from tqdm import tqdm
+import os
+from pandas.core.common import flatten
+import argparse
+MAX_LEN = 1000
+N_CLASS = 4
+parser = argparse.ArgumentParser(
+        description="Spatial Temporal Graph Convolution Network"
+    )
+parser.add_argument(
+    "--data-root",
+    default="dataset/babel_v1.0_sequence/",
+    help="the root path of the dataset",
+    type=str
+)
+parser.add_argument(
+    "--split",
+    default=1,
+    help="the split of the dataset",
+    type=int
+)
+parser.add_argument(
+    "--output-folder",
+    default="dataset/processed_data",
+    help="the output folder of the generated data",
+    type=str
+)
+args = parser.parse_args()
+os.makedirs(args.output_folder, exist_ok=True)
+def main(data_root):
+    train_data = dutils.read_pkl(os.path.join(data_root, "babel_v1.0_train_ntu_sk_ntu-style_preprocessed.pkl"))
+    test_data = dutils.read_pkl(os.path.join(data_root, "babel_v1.0_val_ntu_sk_ntu-style_preprocessed.pkl"))
+    act2idx = dutils.read_json(f"./prepare/configs/action_label_split{args.split}.json")
+    label_train_data(data_root, train_data, act2idx)
+    label_val_data(data_root, test_data, act2idx)
+def label_train_data(data_root, train_data, act2idx):
+    sid = []
+    x = []
+    y = []
+    loc = []
+    for i, seq_labels in enumerate(tqdm(train_data["Y"])):
+        if len(seq_labels) > MAX_LEN:
+            continue
+        y_ = []
+        loc_ = []
+        flag = False
+        for frame, labels in seq_labels.items():
+            label_set = set(labels) & set(act2idx.keys())
+            label_list = list(label_set)
+            if len(label_list) > 0:
+                flag = True
+                loc_.append(act2idx[label_list[0]])
+                y_.append(act2idx[label_list[0]])
+            else:
+                loc_.append(N_CLASS)
+        max_t = len(loc_)
+        loc_ = np.array(loc_)
+        y_ = list(set(y_))
+        if flag:
+            # print (train_data["X"][i].shape, len(loc_))
+            loc.append(loc_)
+            sid.append(train_data["sid"][i])
+            x.append(train_data["X"][i][:,:max_t,...])
+            y.append(y_)
+    data = {"sid": sid, "X": x, "Y": y, "L":loc}
+    dutils.write_pkl(data, os.path.join(args.output_folder, f"train_split{args.split}.pkl"))
+    print (f"#Train sequence: {len(x)}")
+def label_val_data(data_root, test_data, act2idx):
+    sid = []
+    x = []
+    y = []
+    loc = []
+    for i, seq_labels in enumerate(tqdm(test_data["Y"])):
+        if len(seq_labels) > MAX_LEN:
+            continue
+        y_ = []
+        loc_ = []
+        flag = False
+        for frame, labels in seq_labels.items():
+            label_set = set(labels) & set(act2idx.keys())
+            label_list = list(label_set)
+            if len(label_list) > 0:
+                flag = True
+                loc_.append(act2idx[label_list[0]])
+                y_.append(act2idx[label_list[0]])
+            else:
+                loc_.append(N_CLASS)
+        max_t = len(loc_)
+        loc_ = np.array(loc_)
+        y_ = list(set(y_))
+        if flag:
+            loc.append(loc_)
+            sid.append(test_data["sid"][i])
+            x.append(test_data["X"][i][:,:max_t,...])
+            y.append(y_)
+    data = {"sid": sid, "X": x, "Y": y, "L":loc}
+    dutils.write_pkl(data, os.path.join(args.output_folder, f"val_split{args.split}.pkl"))
+    print (f"#Test sequence: {len(x)}")
+if __name__ == "__main__":
+    main(args.data_root)

prepare/viz.py ADDED Viewed

	@@ -0,0 +1,447 @@

+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:fenc=utf-8
+#
+# Copyright © 2020 achandrasekaran <arjun.chandrasekaran@tuebingen.mpg.de>
+#
+# Distributed under terms of the MIT license.
+import math
+import os
+import os.path as osp
+import pdb
+import random
+import shutil
+import subprocess
+import sys
+import uuid
+import cv2
+import dutils
+import numpy as np
+import torch
+from matplotlib import pyplot as plt
+from mpl_toolkits.mplot3d import Axes3D
+from torch.nn.functional import interpolate as intrp
+"""
+Visualize input and output motion sequences and labels
+"""
+def get_smpl_skeleton():
+    """Skeleton ordering so that you traverse joints in this order:
+    Left lower, Left upper, Spine, Neck, Head, Right lower, Right upper.
+    """
+    return np.array(
+        [
+            # Left lower
+            [0, 1],
+            [1, 4],
+            [4, 7],
+            [7, 10],
+            # Left upper
+            [9, 13],
+            [13, 16],
+            [16, 18],
+            [18, 20],
+            # [20, 22],
+            # Spinal column
+            [0, 3],
+            [3, 6],
+            [6, 9],
+            [9, 12],
+            [12, 15],
+            # Right lower
+            [0, 2],
+            [2, 5],
+            [5, 8],
+            [8, 11],
+            # Right upper
+            [9, 14],
+            [14, 17],
+            [17, 19],
+            [19, 21],
+            # [21, 23],
+        ]
+    )
+def get_nturgbd_joint_names():
+    """From paper:
+    1-base of the spine 2-middle of the spine 3-neck 4-head 5-left shoulder 6-left elbow 7-left wrist 8- left hand 9-right shoulder 10-right elbow 11-right wrist 12- right hand 13-left hip 14-left knee 15-left ankle 16-left foot 17- right hip 18-right knee 19-right ankle 20-right foot 21-spine 22- tip of the left hand 23-left thumb 24-tip of the right hand 25- right thumb
+    """
+    # Joint names by AC, based on SMPL names
+    joint_names_map = {
+        0: "Pelvis",
+        12: "L_Hip",
+        13: "L_Knee",
+        14: "L_Ankle",
+        15: "L_Foot",
+        16: "R_Hip",
+        17: "R_Knee",
+        18: "R_Ankle",
+        19: "R_Foot",
+        1: "Spine1",
+        # 'Spine2',
+        20: "Spine3",
+        2: "Neck",
+        3: "Head",
+        # 'L_Collar',
+        4: "L_Shoulder",
+        5: "L_Elbow",
+        6: "L_Wrist",
+        7: "L_Hand",
+        21: "L_HandTip",  # Not in SMPL
+        22: "L_Thumb",  # Not in SMPL
+        # 'R_Collar',
+        8: "R_Shoulder",
+        9: "R_Elbow",
+        10: "R_Wrist",
+        11: "R_Hand",
+        23: "R_HandTip",  # Not in SMPL
+        24: "R_Thumb",  # Not in SMPL
+    }
+    return [joint_names_map[idx] for idx in range(len(joint_names_map))]
+def get_smpl_joint_names():
+    # Joint names from SMPL Wiki
+    joint_names_map = {
+        0: "Pelvis",
+        1: "L_Hip",
+        4: "L_Knee",
+        7: "L_Ankle",
+        10: "L_Foot",
+        2: "R_Hip",
+        5: "R_Knee",
+        8: "R_Ankle",
+        11: "R_Foot",
+        3: "Spine1",
+        6: "Spine2",
+        9: "Spine3",
+        12: "Neck",
+        15: "Head",
+        13: "L_Collar",
+        16: "L_Shoulder",
+        18: "L_Elbow",
+        20: "L_Wrist",
+        22: "L_Hand",
+        14: "R_Collar",
+        17: "R_Shoulder",
+        19: "R_Elbow",
+        21: "R_Wrist",
+        23: "R_Hand",
+    }
+    # Return all joints except indices 22 (L_Hand), 23 (R_Hand)
+    return [joint_names_map[idx] for idx in range(len(joint_names_map) - 2)]
+def get_nturgbd_skeleton():
+    """Skeleton ordering such that you traverse joints in this order:
+    Left lower, Left upper, Spine, Neck, Head, Right lower, Right upper.
+    """
+    return np.array(
+        [
+            # Left lower
+            [0, 12],
+            [12, 13],
+            [13, 14],
+            [14, 15],
+            # Left upper
+            [4, 20],
+            [4, 5],
+            [5, 6],
+            [6, 7],
+            [7, 21],
+            [7, 22],  # --> L Thumb
+            # Spinal column
+            [0, 1],
+            [1, 20],
+            [20, 2],
+            [2, 3],
+            # Right lower
+            [0, 16],
+            [16, 17],
+            [17, 18],
+            [18, 19],
+            # Right upper
+            [20, 8],
+            [8, 9],
+            [9, 10],
+            [10, 11],
+            [11, 24],
+            # [24, 11] --> R Thumb
+            [21, 22],
+            [23, 24],
+        ]
+    )
+def get_joint_colors(joint_names):
+    """Return joints based on a color spectrum. Also, joints on
+    L and R should have distinctly different colors.
+    """
+    # Convert from plt 0-1 RGBA colors to 0-255 BGR colors for opencv.
+    cmap = plt.get_cmap("rainbow")
+    colors = [cmap(i) for i in np.linspace(0, 1, len(joint_names))]
+    colors = [np.array((c[2], c[1], c[0])) for c in colors]
+    return colors
+def calc_angle_from_x(sk):
+    """Given skeleton, calc. angle from x-axis"""
+    # Hip bone
+    id_l_hip = get_smpl_joint_names().index("L_Hip")
+    id_r_hip = get_smpl_joint_names().index("R_Hip")
+    pl, pr = sk[id_l_hip], sk[id_r_hip]
+    bone = np.array(pr - pl)
+    unit_v = bone / np.linalg.norm(bone)
+    # Angle with x-axis
+    pdb.set_trace()
+    x_ax = np.array([1, 0, 0])
+    x_angle = math.degrees(np.arccos(np.dot(x_ax, unit_v)))
+    """
+    l_hip_z = seq[0, joint_names.index('L_Hip'), 2]
+    r_hip_z = seq[0, joint_names.index('R_Hip'), 2]
+    az = 0 if (l_hip_z > zroot and zroot > r_hip_z) else 180
+    """
+    if bone[1] > 0:
+        x_angle = -x_angle
+    return x_angle
+def calc_angle_from_y(sk):
+    """Given skeleton, calc. angle from x-axis"""
+    # Hip bone
+    id_l_hip = get_smpl_joint_names().index("L_Hip")
+    id_r_hip = get_smpl_joint_names().index("R_Hip")
+    pl, pr = sk[id_l_hip], sk[id_r_hip]
+    bone = np.array(pl - pr)
+    unit_v = bone / np.linalg.norm(bone)
+    print(unit_v)
+    # Angle with x-axis
+    pdb.set_trace()
+    y_ax = np.array([0, 1, 0])
+    y_angle = math.degrees(np.arccos(np.dot(y_ax, unit_v)))
+    """
+    l_hip_z = seq[0, joint_names.index('L_Hip'), 2]
+    r_hip_z = seq[0, joint_names.index('R_Hip'), 2]
+    az = 0 if (l_hip_z > zroot and zroot > r_hip_z) else 180
+    """
+    # if bone[1] > 0:
+    #    y_angle = - y_angle
+    seq_y_proj = bone * np.cos(np.deg2rad(y_angle))
+    print("Bone projected onto y-axis: ", seq_y_proj)
+    return y_angle
+def viz_skeleton(
+    seq,
+    folder_p,
+    sk_type="smpl",
+    radius=1,
+    lcolor="#ff0000",
+    rcolor="#0000ff",
+    action="",
+    debug=False,
+):
+    """Visualize skeletons for given sequence and store as images.
+    Args:
+        seq (np.array): Array (frames) of joint positions.
+        Size depends on sk_type (see below).
+            if sk_type is 'smpl' then assume:
+                1. first 3 dims = translation.
+                2. Size = (# frames, 69)
+            elif sk_type is 'nturgbd', then assume:
+                1. no translation.
+                2. Size = (# frames, 25, 3)
+        folder_p (str): Path to root folder containing visualized frames.
+            Frames are dumped to the path: folder_p/frames/*.jpg
+        radius (float): Space around the subject?
+    Returns:
+        Stores skeleton sequence as jpg frames.
+    """
+    joint_names = (
+        get_nturgbd_joint_names() if "nturgbd" == sk_type else get_smpl_joint_names()
+    )
+    n_j = n_j = len(joint_names)
+    az = 90
+    if "smpl" == sk_type:
+        # SMPL kinematic chain, joint list.
+        # NOTE that hands are skipped.
+        kin_chain = get_smpl_skeleton()
+        # Reshape flat pose features into (frames, joints, (x,y,z)) (skip trans)
+        seq = seq[:, 3:].reshape(-1, n_j, 3).cpu().detach().numpy()
+    elif "nturgbd" == sk_type:
+        kin_chain = get_nturgbd_skeleton()
+        az = 0
+    # Get color-spectrum for skeleton
+    colors = get_joint_colors(joint_names)
+    labels = [(joint_names[jidx[0]], joint_names[jidx[1]]) for jidx in kin_chain]
+    # xroot, yroot, zroot = 0.0, 0.0, 0.0
+    xroot, yroot, zroot = seq[0, 0, 0], seq[0, 0, 1], seq[0, 0, 2]
+    # seq = seq - seq[0, :, :]
+    # Change viewing angle so that first frame is in frontal pose
+    # az = calc_angle_from_x(seq[0]-np.array([xroot, yroot, zroot]))
+    # az = calc_angle_from_y(seq[0]-np.array([xroot, yroot, zroot]))
+    # Viz. skeleton for each frame
+    for t in range(seq.shape[0]):
+        # Fig. settings
+        fig = plt.figure(figsize=(7, 6)) if debug else plt.figure(figsize=(5, 5))
+        ax = fig.add_subplot(111, projection="3d")
+        for i, (j1, j2) in enumerate(kin_chain):
+            # Store bones
+            x = np.array([seq[t, j1, 0], seq[t, j2, 0]])
+            y = np.array([seq[t, j1, 1], seq[t, j2, 1]])
+            z = np.array([seq[t, j1, 2], seq[t, j2, 2]])
+            # Plot bones in skeleton
+            ax.plot(x, y, z, c=colors[i], marker="o", linewidth=2, label=labels[i])
+        # More figure settings
+        ax.set_title(action)
+        ax.set_xlabel("X")
+        ax.set_ylabel("Y")
+        ax.set_zlabel("Z")
+        # xroot, yroot, zroot = seq[t, 0, 0], seq[t, 0, 1], seq[t, 0, 2]
+        # pdb.set_trace()
+        ax.set_xlim3d(-radius + xroot, radius + xroot)
+        ax.set_ylim3d([-radius + yroot, radius + yroot])
+        ax.set_zlim3d([-radius + zroot, radius + zroot])
+        if True == debug:
+            ax.axis("on")
+            ax.grid(b=True)
+        else:
+            ax.axis("off")
+            ax.grid(b=None)
+        # Turn off tick labels
+        ax.set_yticklabels([])
+        ax.set_xticklabels([])
+        ax.set_zticklabels([])
+        cv2.waitKey(0)
+        # ax.view_init(-75, 90)
+        # ax.view_init(elev=20, azim=90+az)
+        ax.view_init(elev=20, azim=az)
+        if True == debug:
+            ax.legend(bbox_to_anchor=(1.1, 1), loc="upper right")
+            pass
+        fig.savefig(osp.join(folder_p, "frames", "{0}.jpg".format(t)))
+        plt.close(fig)
+        # break
+def write_vid_from_imgs(folder_p, fps):
+    """Collate frames into a video sequence.
+    Args:
+        folder_p (str): Frame images are in the path: folder_p/frames/<int>.jpg
+        fps (float): Output frame rate.
+    Returns:
+        Output video is stored in the path: folder_p/video.mp4
+    """
+    vid_p = osp.join(folder_p, "video.mp4")
+    cmd = [
+        "ffmpeg",
+        "-r",
+        str(int(fps)),
+        "-i",
+        osp.join(folder_p, "frames", "%d.jpg"),
+        "-y",
+        vid_p,
+    ]
+    FNULL = open(os.devnull, "w")
+    retcode = subprocess.call(cmd, stdout=FNULL, stderr=subprocess.STDOUT)
+    if not 0 == retcode:
+        print(
+            "*******ValueError(Error {0} executing command: {1}*********".format(
+                retcode, " ".join(cmd)
+            )
+        )
+    shutil.rmtree(osp.join(folder_p, "frames"))
+def viz_seq(seq, folder_p, sk_type, orig_fps=30.0, debug=False):
+    """1. Dumps sequence of skeleton images for the given sequence of joints.
+    2. Collates the sequence of images into an mp4 video.
+    Args:
+        seq (np.array): Array of joint positions.
+        folder_p (str): Path to root folder that will contain frames folder.
+        sk_type (str): {'smpl', 'nturgbd'}
+    Return:
+        None. Path of mp4 video: folder_p/video.mp4
+    """
+    # Delete folder if exists
+    if osp.exists(folder_p):
+        print("Deleting existing folder ", folder_p)
+        shutil.rmtree(folder_p)
+    # Create folder for frames
+    os.makedirs(osp.join(folder_p, "frames"))
+    # Dump frames into folder. Args: (data, radius, frames path)
+    viz_skeleton(seq, folder_p=folder_p, sk_type=sk_type, radius=1.2, debug=debug)
+    write_vid_from_imgs(folder_p, orig_fps)
+    return None
+def viz_rand_seq(X, Y, dtype, epoch, wb, urls=None, k=3, pred_labels=None):
+    """
+    Args:
+        X (np.array): Array (frames) of SMPL joint positions.
+        Y (np.array): Multiple labels for each frame in x \in X.
+        dtype (str): {'input', 'pred'}
+        k (int): # samples to viz.
+        urls (tuple): Tuple of URLs of the rendered videos from original mocap.
+        wb (dict): Wandb log dict.
+    Returns:
+        viz_ds (dict): Data structure containing all viz. info so far.
+    """
+    import wandb
+    # `idx2al`: idx --> action label string
+    al2idx = dutils.read_json("data/action_label_to_idx.json")
+    idx2al = {al2idx[k]: k for k in al2idx}
+    # Sample k random seqs. to viz.
+    for s_idx in random.sample(list(range(X.shape[0])), k):
+        # Visualize a single seq. in path `folder_p`
+        folder_p = osp.join("viz", str(uuid.uuid4()))
+        viz_seq(seq=X[s_idx], folder_p=folder_p)
+        title = "{0} seq. {1}: ".format(dtype, s_idx)
+        acts_str = ", ".join([idx2al[l] for l in torch.unique(Y[s_idx])])
+        wb[title + urls[s_idx]] = wandb.Video(
+            osp.join(folder_p, "video.mp4"), caption="Actions: " + acts_str
+        )
+        if "pred" == dtype or "preds" == dtype:
+            raise NotImplementedError
+    print("Done viz. {0} seqs.".format(k))
+    return wb

pyproject.toml ADDED Viewed

	@@ -0,0 +1,13 @@

+[tool.pysen]
+version = "0.10"
+[tool.pysen.lint]
+enable_black = true
+enable_flake8 = true
+enable_isort = true
+enable_mypy = true
+mypy_preset = "strict"
+line_length = 88
+py_version = "py37"
+[[tool.pysen.lint.mypy_targets]]
+  paths = ["."]

requirements.txt ADDED Viewed

	@@ -0,0 +1,28 @@

+backcall==0.2.0
+certifi==2020.12.5
+decorator==4.4.2
+ipdb==0.13.4
+jedi==0.18.0
+joblib==1.0.0
+networkx==2.5
+parso==0.8.1
+pexpect==4.8.0
+pickleshare==0.7.5
+prompt-toolkit==3.0.10
+protobuf==3.14.0
+ptyprocess==0.7.0
+Pygments==2.7.4
+six==1.15.0
+tensorboardX==2.1
+threadpoolctl==2.1.0
+tqdm==4.56.0
+traitlets==5.0.5
+typing-extensions==3.7.4.3
+wcwidth==0.2.5
+smplx==0.1.28
+opencv-python
+einops
+matplotlib
+scikit-learn
+pandas
+chumpy

train.py ADDED Viewed

	@@ -0,0 +1,830 @@

+"""
+Copyright 2023 LINE Corporation
+LINE Corporation licenses this file to you under the Apache License,
+version 2.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at:
+    https://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+License for the specific language governing permissions and limitations
+under the License.
+"""
+from __future__ import print_function
+import argparse
+import inspect
+import os
+import pdb
+import pickle
+import random
+import re
+import shutil
+import time
+from collections import *
+import ipdb
+import numpy as np
+# torch
+import torch
+import torch.backends.cudnn as cudnn
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+import yaml
+from einops import rearrange, reduce, repeat
+from evaluation.classificationMAP import getClassificationMAP as cmAP
+from evaluation.detectionMAP import getSingleStreamDetectionMAP as dsmAP
+from feeders.tools import collate_with_padding_multi_joint
+from model.losses import cross_entropy_loss, mvl_loss
+from sklearn.metrics import f1_score
+# Custom
+from tensorboardX import SummaryWriter
+from torch.autograd import Variable
+from torch.optim.lr_scheduler import _LRScheduler
+from tqdm import tqdm
+from utils.logger import Logger
+def remove_prefix_from_state_dict(state_dict, prefix):
+    new_state_dict = {}
+    for k, v in state_dict.items():
+        if k.startswith(prefix):
+            print(k)
+            new_k = k[len(prefix):]  # strip the prefix
+            print(new_k)
+        else:
+            print(k)
+            new_k = k
+        new_state_dict[new_k] = v
+    return new_state_dict
+def init_seed(seed):
+    torch.cuda.manual_seed_all(seed)
+    torch.manual_seed(seed)
+    np.random.seed(seed)
+    random.seed(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+def get_parser():
+    # parameter priority: command line > config > default
+    parser = argparse.ArgumentParser(
+        description="Spatial Temporal Graph Convolution Network"
+    )
+    parser.add_argument(
+        "--work-dir",
+        default="./work_dir/temp",
+        help="the work folder for storing results",
+    )
+    parser.add_argument("-model_saved_name", default="")
+    parser.add_argument(
+        "--config",
+        default="./config/nturgbd-cross-view/test_bone.yaml",
+        help="path to the configuration file",
+    )
+    # processor
+    parser.add_argument("--phase", default="train", help="must be train or test")
+    # visulize and debug
+    parser.add_argument("--seed", type=int, default=5, help="random seed for pytorch")
+    parser.add_argument(
+        "--log-interval",
+        type=int,
+        default=100,
+        help="the interval for printing messages (#iteration)",
+    )
+    parser.add_argument(
+        "--save-interval",
+        type=int,
+        default=2,
+        help="the interval for storing models (#iteration)",
+    )
+    parser.add_argument(
+        "--eval-interval",
+        type=int,
+        default=5,
+        help="the interval for evaluating models (#iteration)",
+    )
+    parser.add_argument(
+        "--print-log", type=str2bool, default=True, help="print logging or not"
+    )
+    parser.add_argument(
+        "--show-topk",
+        type=int,
+        default=[1, 5],
+        nargs="+",
+        help="which Top K accuracy will be shown",
+    )
+    # feeder
+    parser.add_argument(
+        "--feeder", default="feeder.feeder", help="data loader will be used"
+    )
+    parser.add_argument(
+        "--num-worker",
+        type=int,
+        default=32,
+        help="the number of worker for data loader",
+    )
+    parser.add_argument(
+        "--train-feeder-args",
+        default=dict(),
+        help="the arguments of data loader for training",
+    )
+    parser.add_argument(
+        "--test-feeder-args",
+        default=dict(),
+        help="the arguments of data loader for test",
+    )
+    # model
+    parser.add_argument("--model", default=None, help="the model will be used")
+    parser.add_argument(
+        "--model-args", type=dict, default=dict(), help="the arguments of model"
+    )
+    parser.add_argument(
+        "--weights", default=None, help="the weights for network initialization"
+    )
+    parser.add_argument(
+        "--ignore-weights",
+        type=str,
+        default=[],
+        nargs="+",
+        help="the name of weights which will be ignored in the initialization",
+    )
+    # optim
+    parser.add_argument(
+        "--base-lr", type=float, default=0.01, help="initial learning rate"
+    )
+    parser.add_argument(
+        "--step",
+        type=int,
+        default=[60,80],
+        nargs="+",
+        help="the epoch where optimizer reduce the learning rate",
+    )
+    # training
+    parser.add_argument(
+        "--device",
+        type=int,
+        default=0,
+        nargs="+",
+        help="the indexes of GPUs for training or testing",
+    )
+    parser.add_argument("--optimizer", default="SGD", help="type of optimizer")
+    parser.add_argument(
+        "--nesterov", type=str2bool, default=False, help="use nesterov or not"
+    )
+    parser.add_argument(
+        "--batch-size", type=int, default=256, help="training batch size"
+    )
+    parser.add_argument(
+        "--test-batch-size", type=int, default=256, help="test batch size"
+    )
+    parser.add_argument(
+        "--start-epoch", type=int, default=0, help="start training from which epoch"
+    )
+    parser.add_argument(
+        "--num-epoch", type=int, default=80, help="stop training in which epoch"
+    )
+    parser.add_argument(
+        "--weight-decay", type=float, default=0.0005, help="weight decay for optimizer"
+    )
+    # loss
+    parser.add_argument("--loss", type=str, default="CE", help="loss type(CE or focal)")
+    parser.add_argument(
+        "--label_count_path",
+        default=None,
+        type=str,
+        help="Path to label counts (used in loss weighting)",
+    )
+    parser.add_argument(
+        "---beta",
+        type=float,
+        default=0.9999,
+        help="Hyperparameter for Class balanced loss",
+    )
+    parser.add_argument(
+        "--gamma", type=float, default=2.0, help="Hyperparameter for Focal loss"
+    )
+    parser.add_argument("--only_train_part", default=False)
+    parser.add_argument("--only_train_epoch", default=0)
+    parser.add_argument("--warm_up_epoch", default=0)
+    parser.add_argument(
+        "--lambda-mil", default=1.0, help="balancing hyper-parameter of mil branch"
+    )
+    parser.add_argument(
+        "--class-threshold",
+        type=float,
+        default=0.1,
+        help="class threshold for rejection",
+    )
+    parser.add_argument(
+        "--start-threshold",
+        type=float,
+        default=0.03,
+        help="start threshold for action localization",
+    )
+    parser.add_argument(
+        "--end-threshold",
+        type=float,
+        default=0.055,
+        help="end threshold for action localization",
+    )
+    parser.add_argument(
+        "--threshold-interval",
+        type=float,
+        default=0.005,
+        help="threshold interval for action localization",
+    )
+    return parser
+class Processor:
+    """
+    Processor for Skeleton-based Action Recgnition
+    """
+    def __init__(self, arg):
+        self.arg = arg
+        self.save_arg()
+        if arg.phase == "train":
+            if not arg.train_feeder_args["debug"]:
+                if os.path.isdir(arg.model_saved_name):
+                    print("log_dir: ", arg.model_saved_name, "already exist")
+                    # answer = input('delete it? y/n:')
+                    answer = "y"
+                    if answer == "y":
+                        print("Deleting dir...")
+                        shutil.rmtree(arg.model_saved_name)
+                        print("Dir removed: ", arg.model_saved_name)
+                        # input('Refresh the website of tensorboard by pressing any keys')
+                    else:
+                        print("Dir not removed: ", arg.model_saved_name)
+                self.train_writer = SummaryWriter(
+                    os.path.join(arg.model_saved_name, "train"), "train"
+                )
+                self.val_writer = SummaryWriter(
+                    os.path.join(arg.model_saved_name, "val"), "val"
+                )
+            else:
+                self.train_writer = self.val_writer = SummaryWriter(
+                    os.path.join(arg.model_saved_name, "test"), "test"
+                )
+        self.global_step = 0
+        self.load_model()
+        self.load_optimizer()
+        self.load_data()
+        self.lr = self.arg.base_lr
+        self.best_acc = 0
+        self.best_per_class_acc = 0
+        self.loss_nce = torch.nn.BCELoss()
+        self.my_logger = Logger(
+            os.path.join(arg.model_saved_name, "log.txt"), title="SWTAL"
+        )
+        self.my_logger.set_names(["Step", "cmap"] + [f"map_0.{i}" for i in range(1, 6)])
+    def load_data(self):
+        Feeder = import_class(self.arg.feeder)
+        self.data_loader = dict()
+        if self.arg.phase == "train":
+            self.data_loader["train"] = torch.utils.data.DataLoader(
+                dataset=Feeder(**self.arg.train_feeder_args),
+                batch_size=self.arg.batch_size,
+                shuffle=True,
+                num_workers=self.arg.num_worker,
+                drop_last=True,
+                collate_fn=collate_with_padding_multi_joint,
+            )
+        self.data_loader["test"] = torch.utils.data.DataLoader(
+            dataset=Feeder(**self.arg.test_feeder_args),
+            batch_size=self.arg.test_batch_size,
+            shuffle=False,
+            num_workers=self.arg.num_worker,
+            drop_last=False,
+            collate_fn=collate_with_padding_multi_joint,
+        )
+    def load_model(self):
+        output_device = (
+            self.arg.device[0] if type(self.arg.device) is list else self.arg.device
+        )
+        self.output_device = output_device
+        Model = import_class(self.arg.model)
+        shutil.copy2(inspect.getfile(Model), self.arg.work_dir)
+        # print(Model)
+        self.model = Model(**self.arg.model_args).cuda(output_device)
+        # print(self.model)
+        self.loss_type = arg.loss
+        if self.arg.weights:
+        # if False:
+            # self.global_step = int(arg.weights[:-3].split("-")[-1])
+            self.print_log("Load weights from {}.".format(self.arg.weights))
+            if ".pkl" in self.arg.weights:
+                with open(self.arg.weights, "r") as f:
+                    weights = pickle.load(f)
+            else:
+                weights = torch.load(self.arg.weights)
+            weights = OrderedDict(
+                [
+                    [k.split("module.")[-1], v.cuda(output_device)]
+                    for k, v in weights.items()
+                ]
+            )
+            weights = remove_prefix_from_state_dict(weights, 'encoder_q.agcn.')
+            keys = list(weights.keys())
+            self.arg.ignore_weights = ['encoder_q','encoder_q.relation','data_bn','fc','encoder_k','queue','queue_ptr','value_transform']
+            for w in self.arg.ignore_weights:
+                for key in keys:
+                    if w in key:
+                        if weights.pop(key, None) is not None:
+                            self.print_log(
+                                "Sucessfully Remove Weights: {}.".format(key)
+                            )
+                        else:
+                            self.print_log("Can Not Remove Weights: {}.".format(key))
+            try:
+                self.model.load_state_dict(weights)
+            except:
+                state = self.model.state_dict()
+                diff = list(set(state.keys()).difference(set(weights.keys())))
+                print("Can not find these weights:")
+                for d in diff:
+                    print("  " + d)
+                state.update(weights)
+                self.model.load_state_dict(state)
+        if type(self.arg.device) is list:
+            if len(self.arg.device) > 1:
+                self.model = nn.DataParallel(
+                    self.model, device_ids=self.arg.device, output_device=output_device
+                )
+    # # def load_model(self):
+    #     output_device = (
+    #         self.arg.device[0] if type(self.arg.device) is list else self.arg.device
+    #     )
+    #     self.output_device = output_device
+    #     Model = import_class(self.arg.model)
+    #     shutil.copy2(inspect.getfile(Model), self.arg.work_dir)
+    #     # print(Model)
+    #     self.model = Model(**self.arg.model_args).cuda(output_device)
+    #     # print(self.model)
+    #     self.loss_type = arg.loss
+    #     if self.arg.weights:
+    #         # self.global_step = int(arg.weights[:-3].split("-")[-1])
+    #         self.print_log("Load weights from {}.".format(self.arg.weights))
+    #         if ".pkl" in self.arg.weights:
+    #             with open(self.arg.weights, "r") as f:
+    #                 weights = pickle.load(f)
+    #         else:
+    #             weights = torch.load(self.arg.weights)
+    #         weights = OrderedDict(
+    #             [
+    #                 [k.split("module.")[-1], v.cuda(output_device)]
+    #                 for k, v in weights.items()
+    #             ]
+    #         )
+    #         keys = list(weights.keys())
+    #         for w in self.arg.ignore_weights:
+    #             for key in keys:
+    #                 if w in key:
+    #                     if weights.pop(key, None) is not None:
+    #                         self.print_log(
+    #                             "Sucessfully Remove Weights: {}.".format(key)
+    #                         )
+    #                     else:
+    #                         self.print_log("Can Not Remove Weights: {}.".format(key))
+    #         try:
+    #             self.model.load_state_dict(weights)
+    #         except:
+    #             state = self.model.state_dict()
+    #             diff = list(set(state.keys()).difference(set(weights.keys())))
+    #             print("Can not find these weights:")
+    #             for d in diff:
+    #                 print("  " + d)
+    #             state.update(weights)
+    #             self.model.load_state_dict(state)
+    #     if type(self.arg.device) is list:
+    #         if len(self.arg.device) > 1:
+    #             self.model = nn.DataParallel(
+    #                 self.model, device_ids=self.arg.device, output_device=output_device
+    #             )
+    def load_optimizer(self):
+        if self.arg.optimizer == "SGD":
+            self.optimizer = optim.SGD(
+                self.model.parameters(),
+                lr=self.arg.base_lr,
+                momentum=0.9,
+                nesterov=self.arg.nesterov,
+                weight_decay=self.arg.weight_decay,
+            )
+        elif self.arg.optimizer == "Adam":
+            self.optimizer = optim.Adam(
+                self.model.parameters(),
+                lr=self.arg.base_lr,
+                weight_decay=self.arg.weight_decay,
+            )
+        else:
+            raise ValueError()
+    def save_arg(self):
+        # save arg
+        arg_dict = vars(self.arg)
+        if not os.path.exists(self.arg.work_dir):
+            os.makedirs(self.arg.work_dir)
+        with open("{}/config.yaml".format(self.arg.work_dir), "w") as f:
+            yaml.dump(arg_dict, f)
+    def adjust_learning_rate(self, epoch):
+        if self.arg.optimizer == "SGD" or self.arg.optimizer == "Adam":
+            if epoch < self.arg.warm_up_epoch:
+                lr = self.arg.base_lr * (epoch + 1) / self.arg.warm_up_epoch
+            else:
+                lr = self.arg.base_lr * (
+                    0.1 ** np.sum(epoch >= np.array(self.arg.step))
+                )
+            for param_group in self.optimizer.param_groups:
+                param_group["lr"] = lr
+            return lr
+        else:
+            raise ValueError()
+    def print_time(self):
+        localtime = time.asctime(time.localtime(time.time()))
+        self.print_log("Local current time :  " + localtime)
+    def print_log(self, str, print_time=True):
+        if print_time:
+            localtime = time.asctime(time.localtime(time.time()))
+            str = "[ " + localtime + " ] " + str
+        print(str)
+        if self.arg.print_log:
+            with open("{}/print_log.txt".format(self.arg.work_dir), "a") as f:
+                print(str, file=f)
+    def record_time(self):
+        self.cur_time = time.time()
+        return self.cur_time
+    def split_time(self):
+        split_time = time.time() - self.cur_time
+        self.record_time()
+        return split_time
+    def train(self, epoch, wb_dict, save_model=False):
+        self.model.train()
+        self.print_log("Training epoch: {}".format(epoch + 1))
+        loader = self.data_loader["train"]
+        self.adjust_learning_rate(epoch)
+        loss_value, batch_acc = [], []
+        self.train_writer.add_scalar("epoch", epoch, self.global_step)
+        self.record_time()
+        timer = dict(dataloader=0.001, model=0.001, statistics=0.001)
+        process = tqdm(loader)
+        if self.arg.only_train_part:
+            if epoch > self.arg.only_train_epoch:
+                print("only train part, require grad")
+                for key, value in self.model.named_parameters():
+                    if "PA" in key:
+                        value.requires_grad = True
+            else:
+                print("only train part, do not require grad")
+                for key, value in self.model.named_parameters():
+                    if "PA" in key:
+                        value.requires_grad = False
+        vid_preds = []
+        frm_preds = []
+        vid_lens = []
+        labels = []
+        results = []
+        indexs = []
+        for batch_idx, (data, label, target, mask, index, soft_label) in enumerate(
+            process
+        ):
+            self.global_step += 1
+            # get data
+            data = data.float().cuda(self.output_device)
+            label = label.cuda(self.output_device)
+            mask = mask.cuda(self.output_device)
+            soft_label = soft_label.cuda(self.output_device)
+            timer["dataloader"] += self.split_time()
+            indexs.extend(index.cpu().numpy().tolist())
+            ab_labels = torch.cat([label, torch.ones(label.size(0), 1).cuda()], -1)
+            # forward
+            mil_pred, frm_scrs, mil_pred_2, frm_scrs_2 = self.model(data,mask)
+            cls_mil_loss = self.loss_nce(mil_pred, ab_labels.float()) + self.loss_nce(
+                mil_pred_2, ab_labels.float()
+            )
+            if epoch > 10:
+                frm_scrs_re = rearrange(frm_scrs, "n t c -> (n t) c")
+                frm_scrs_2_re = rearrange(frm_scrs_2, "n t c -> (n t) c")
+                soft_label = rearrange(soft_label, "n t c -> (n t) c")
+                loss = cls_mil_loss * 0.1 + mvl_loss(
+                    frm_scrs, frm_scrs_2, rate=0.2, weight=0.5
+                )
+                loss += cross_entropy_loss(
+                    frm_scrs_re, soft_label
+                ) + cross_entropy_loss(frm_scrs_2_re, soft_label)
+            else:
+                loss = cls_mil_loss * self.arg.lambda_mil + mvl_loss(
+                    frm_scrs, frm_scrs_2, rate=0.2, weight=0.5
+                )
+            for i in range(data.size(0)):
+                frm_scr = frm_scrs[i]
+                label_ = label[i].cpu().numpy()
+                mask_ = mask[i].cpu().numpy()
+                vid_len = mask_.sum()
+                frm_pred = F.softmax(frm_scr, -1).detach().cpu().numpy()[:vid_len]
+                vid_pred = mil_pred[i].detach().cpu().numpy()
+                results.append(frm_pred)
+                vid_preds.append(vid_pred)
+                frm_preds.append(frm_pred)
+                vid_lens.append(vid_len)
+                labels.append(label_)
+            # backward
+            self.optimizer.zero_grad()
+            loss.backward()
+            self.optimizer.step()
+            loss_value.append(loss.data.item())
+            timer["model"] += self.split_time()
+        vid_preds = np.array(vid_preds)
+        frm_preds = np.array(frm_preds)
+        vid_lens = np.array(vid_lens)
+        labels = np.array(labels)
+        loader.dataset.label_update(results, indexs)
+        cmap = cmAP(vid_preds, labels)
+        self.train_writer.add_scalar("acc", cmap, self.global_step)
+        self.train_writer.add_scalar("loss", np.mean(loss_value), self.global_step)
+        # statistics
+        self.lr = self.optimizer.param_groups[0]["lr"]
+        self.train_writer.add_scalar("lr", self.lr, self.global_step)
+        timer["statistics"] += self.split_time()
+        # statistics of time consumption and loss
+        self.print_log("\tMean training loss: {:.4f}.".format(np.mean(loss_value)))
+        self.print_log("\tAcc score: {:.3f}%".format(cmap))
+        # Log
+        wb_dict["train loss"] = np.mean(loss_value)
+        wb_dict["train acc"] = cmap
+        if save_model:
+            state_dict = self.model.state_dict()
+            weights = OrderedDict(
+                [[k.split("module.")[-1], v.cpu()] for k, v in state_dict.items()]
+            )
+            torch.save(
+                weights,
+                self.arg.model_saved_name + str(epoch) + ".pt",
+            )
+        return wb_dict
+    @torch.no_grad()
+    def eval(
+        self,
+        epoch,
+        wb_dict,
+        loader_name=["test"],
+    ):
+        self.model.eval()
+        self.print_log("Eval epoch: {}".format(epoch + 1))
+        vid_preds = []
+        frm_preds = []
+        vid_lens = []
+        labels = []
+        for ln in loader_name:
+            loss_value = []
+            step = 0
+            process = tqdm(self.data_loader[ln])
+            for batch_idx, (data, label, target, mask, index, soft_label) in enumerate(
+                process
+            ):
+                data = data.float().cuda(self.output_device)
+                label = label.cuda(self.output_device)
+                mask = mask.cuda(self.output_device)
+                ab_labels = torch.cat([label, torch.ones(label.size(0), 1).cuda()], -1)
+                # forward
+                mil_pred, frm_scrs, mil_pred_2, frm_scrs_2 = self.model(data,mask)
+                cls_mil_loss = self.loss_nce(
+                    mil_pred, ab_labels.float()
+                ) + self.loss_nce(mil_pred_2, ab_labels.float())
+                loss_co = mvl_loss(frm_scrs, frm_scrs_2, rate=0.2, weight=0.5)
+                loss = cls_mil_loss * self.arg.lambda_mil + loss_co
+                loss_value.append(loss.data.item())
+                for i in range(data.size(0)):
+                    frm_scr = frm_scrs[i]
+                    vid_pred = mil_pred[i]
+                    label_ = label[i].cpu().numpy()
+                    mask_ = mask[i].cpu().numpy()
+                    vid_len = mask_.sum()
+                    frm_pred = F.softmax(frm_scr, -1).cpu().numpy()[:vid_len]
+                    vid_pred = vid_pred.cpu().numpy()
+                    vid_preds.append(vid_pred)
+                    frm_preds.append(frm_pred)
+                    vid_lens.append(vid_len)
+                    labels.append(label_)
+                step += 1
+            vid_preds = np.array(vid_preds)
+            frm_preds = np.array(frm_preds)
+            vid_lens = np.array(vid_lens)
+            labels = np.array(labels)
+            cmap = cmAP(vid_preds, labels)
+            score = cmap
+            loss = np.mean(loss_value)
+            dmap, iou = dsmAP(
+                vid_preds,
+                frm_preds,
+                vid_lens,
+                self.arg.test_feeder_args["data_path"],
+                self.arg,
+                multi=True,
+            )
+            print("Classification map %f" % cmap)
+            for item in list(zip(iou, dmap)):
+                print("Detection map @ %f = %f" % (item[0], item[1]))
+            self.my_logger.append([epoch + 1, cmap] + dmap)
+            wb_dict["val loss"] = loss
+            wb_dict["val acc"] = score
+            if score > self.best_acc:
+                self.best_acc = score
+            print("Acc score: ", score, " model: ", self.arg.model_saved_name)
+            if self.arg.phase == "train":
+                self.val_writer.add_scalar("loss", loss, self.global_step)
+                self.val_writer.add_scalar("acc", score, self.global_step)
+            self.print_log(
+                "\tMean {} loss of {} batches: {}.".format(
+                    ln, len(self.data_loader[ln]), np.mean(loss_value)
+                )
+            )
+            self.print_log("\tAcc score: {:.3f}%".format(score))
+        return wb_dict
+    def start(self):
+        wb_dict = {}
+        if self.arg.phase == "train":
+            self.print_log("Parameters:\n{}\n".format(str(vars(self.arg))))
+            self.global_step = (
+                self.arg.start_epoch
+                * len(self.data_loader["train"])
+                / self.arg.batch_size
+            )
+            for epoch in range(self.arg.start_epoch, self.arg.num_epoch):
+                save_model = ((epoch + 1) % self.arg.save_interval == 0) or (
+                    epoch + 1 == self.arg.num_epoch
+                )
+                wb_dict = {"lr": self.lr}
+                # Train
+                wb_dict = self.train(epoch, wb_dict, save_model=save_model)
+                # Eval. on val set
+                wb_dict = self.eval(epoch, wb_dict, loader_name=["test"])
+                # Log stats. for this epoch
+                print("Epoch: {0}\nMetrics: {1}".format(epoch, wb_dict))
+            print(
+                "best accuracy: ",
+                self.best_acc,
+                " model_name: ",
+                self.arg.model_saved_name,
+            )
+        elif self.arg.phase == "test":
+            if not self.arg.test_feeder_args["debug"]:
+                wf = self.arg.model_saved_name + "_wrong.txt"
+                rf = self.arg.model_saved_name + "_right.txt"
+            else:
+                wf = rf = None
+            if self.arg.weights is None:
+                raise ValueError("Please appoint --weights.")
+            self.arg.print_log = False
+            self.print_log("Model:   {}.".format(self.arg.model))
+            self.print_log("Weights: {}.".format(self.arg.weights))
+            wb_dict = self.eval(
+                epoch=0,
+                wb_dict=wb_dict,
+                loader_name=["test"],
+                wrong_file=wf,
+                result_file=rf,
+            )
+            print("Inference metrics: ", wb_dict)
+            self.print_log("Done.\n")
+def str2bool(v):
+    if v.lower() in ("yes", "true", "t", "y", "1"):
+        return True
+    elif v.lower() in ("no", "false", "f", "n", "0"):
+        return False
+    else:
+        raise argparse.ArgumentTypeError("Boolean value expected.")
+def import_class(name):
+    components = name.split(".")
+    mod = __import__(components[0])
+    for comp in components[1:]:
+        mod = getattr(mod, comp)
+    return mod
+if __name__ == "__main__":
+    parser = get_parser()
+    # load arg form config file
+    p = parser.parse_args()
+    if p.config is not None:
+        with open(p.config, "r") as f:
+            default_arg = yaml.safe_load(f)
+        key = vars(p).keys()
+        for k in default_arg.keys():
+            if k not in key:
+                print("WRONG ARG: {}".format(k))
+                assert k in key
+        parser.set_defaults(**default_arg)
+    arg = parser.parse_args()
+    print("BABEL Action Recognition")
+    print("Config: ", arg)
+    init_seed(arg.seed)
+    processor = Processor(arg)
+    processor.start()

train_full.py ADDED Viewed

	@@ -0,0 +1,788 @@

+from __future__ import print_function
+import argparse
+import inspect
+import os
+import pdb
+import pickle
+import random
+import re
+import shutil
+import time
+from collections import *
+import ipdb
+import numpy as np
+# torch
+import torch
+import torch.backends.cudnn as cudnn
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+import yaml
+from einops import rearrange, reduce, repeat
+from evaluation.classificationMAP import getClassificationMAP as cmAP
+from evaluation.detectionMAP import getSingleStreamDetectionMAP as dsmAP
+from feeders.tools import collate_with_padding_multi_joint
+from model.losses import cross_entropy_loss, mvl_loss
+from sklearn.metrics import f1_score
+# Custom
+from tensorboardX import SummaryWriter
+from torch.autograd import Variable
+from torch.optim.lr_scheduler import _LRScheduler
+from tqdm import tqdm
+from utils.logger import Logger
+# seed = 0
+# random.seed(seed)
+# np.random.seed(seed)
+# torch.manual_seed(seed)
+# torch.cuda.manual_seed_all(seed)
+# torch.use_deterministic_algorithms(True)
+# torch.backends.cudnn.deterministic = True
+# torch.backends.cudnn.benchmark = False
+def init_seed(seed):
+    torch.cuda.manual_seed_all(seed)
+    torch.manual_seed(seed)
+    np.random.seed(seed)
+    random.seed(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+def get_parser():
+    # parameter priority: command line > config > default
+    parser = argparse.ArgumentParser(
+        description="Spatial Temporal Graph Convolution Network"
+    )
+    parser.add_argument(
+        "--work-dir",
+        default="./work_dir/temp",
+        help="the work folder for storing results",
+    )
+    parser.add_argument("-model_saved_name", default="")
+    parser.add_argument(
+        "--config",
+        default="./config/nturgbd-cross-view/test_bone.yaml",
+        help="path to the configuration file",
+    )
+    # processor
+    parser.add_argument("--phase", default="train", help="must be train or test")
+    # visulize and debug
+    parser.add_argument("--seed", type=int, default=5, help="random seed for pytorch")
+    parser.add_argument(
+        "--log-interval",
+        type=int,
+        default=100,
+        help="the interval for printing messages (#iteration)",
+    )
+    parser.add_argument(
+        "--save-interval",
+        type=int,
+        default=2,
+        help="the interval for storing models (#iteration)",
+    )
+    parser.add_argument(
+        "--eval-interval",
+        type=int,
+        default=5,
+        help="the interval for evaluating models (#iteration)",
+    )
+    parser.add_argument(
+        "--print-log", type=str2bool, default=True, help="print logging or not"
+    )
+    parser.add_argument(
+        "--show-topk",
+        type=int,
+        default=[1, 5],
+        nargs="+",
+        help="which Top K accuracy will be shown",
+    )
+    # feeder
+    parser.add_argument(
+        "--feeder", default="feeder.feeder", help="data loader will be used"
+    )
+    parser.add_argument(
+        "--num-worker",
+        type=int,
+        default=32,
+        help="the number of worker for data loader",
+    )
+    parser.add_argument(
+        "--train-feeder-args",
+        default=dict(),
+        help="the arguments of data loader for training",
+    )
+    parser.add_argument(
+        "--test-feeder-args",
+        default=dict(),
+        help="the arguments of data loader for test",
+    )
+    # model
+    parser.add_argument("--model", default=None, help="the model will be used")
+    parser.add_argument(
+        "--model-args", type=dict, default=dict(), help="the arguments of model"
+    )
+    parser.add_argument(
+        "--weights", default=None, help="the weights for network initialization"
+    )
+    parser.add_argument(
+        "--ignore-weights",
+        type=str,
+        default=[],
+        nargs="+",
+        help="the name of weights which will be ignored in the initialization",
+    )
+    # optim
+    parser.add_argument(
+        "--base-lr", type=float, default=0.01, help="initial learning rate"
+    )
+    parser.add_argument(
+        "--step",
+        type=int,
+        default=[60,80],
+        nargs="+",
+        help="the epoch where optimizer reduce the learning rate",
+    )
+    # training
+    parser.add_argument(
+        "--device",
+        type=int,
+        default=0,
+        nargs="+",
+        help="the indexes of GPUs for training or testing",
+    )
+    parser.add_argument("--optimizer", default="SGD", help="type of optimizer")
+    parser.add_argument(
+        "--nesterov", type=str2bool, default=False, help="use nesterov or not"
+    )
+    parser.add_argument(
+        "--batch-size", type=int, default=256, help="training batch size"
+    )
+    parser.add_argument(
+        "--test-batch-size", type=int, default=256, help="test batch size"
+    )
+    parser.add_argument(
+        "--start-epoch", type=int, default=0, help="start training from which epoch"
+    )
+    parser.add_argument(
+        "--num-epoch", type=int, default=80, help="stop training in which epoch"
+    )
+    parser.add_argument(
+        "--weight-decay", type=float, default=0.0005, help="weight decay for optimizer"
+    )
+    # loss
+    parser.add_argument("--loss", type=str, default="CE", help="loss type(CE or focal)")
+    parser.add_argument(
+        "--label_count_path",
+        default=None,
+        type=str,
+        help="Path to label counts (used in loss weighting)",
+    )
+    parser.add_argument(
+        "---beta",
+        type=float,
+        default=0.9999,
+        help="Hyperparameter for Class balanced loss",
+    )
+    parser.add_argument(
+        "--gamma", type=float, default=2.0, help="Hyperparameter for Focal loss"
+    )
+    parser.add_argument("--only_train_part", default=False)
+    parser.add_argument("--only_train_epoch", default=0)
+    parser.add_argument("--warm_up_epoch", default=10)
+    parser.add_argument(
+        "--lambda-mil", default=1.0, help="balancing hyper-parameter of mil branch"
+    )
+    parser.add_argument(
+        "--class-threshold",
+        type=float,
+        default=0.1,
+        help="class threshold for rejection",
+    )
+    parser.add_argument(
+        "--start-threshold",
+        type=float,
+        default=0.03,
+        help="start threshold for action localization",
+    )
+    parser.add_argument(
+        "--end-threshold",
+        type=float,
+        default=0.055,
+        help="end threshold for action localization",
+    )
+    parser.add_argument(
+        "--threshold-interval",
+        type=float,
+        default=0.005,
+        help="threshold interval for action localization",
+    )
+    return parser
+class Processor:
+    """
+    Processor for Skeleton-based Action Recgnition
+    """
+    def __init__(self, arg):
+        self.arg = arg
+        self.save_arg()
+        if arg.phase == "train":
+            if not arg.train_feeder_args["debug"]:
+                if os.path.isdir(arg.model_saved_name):
+                    print("log_dir: ", arg.model_saved_name, "already exist")
+                    # answer = input('delete it? y/n:')
+                    answer = "y"
+                    if answer == "y":
+                        print("Deleting dir...")
+                        shutil.rmtree(arg.model_saved_name)
+                        print("Dir removed: ", arg.model_saved_name)
+                        # input('Refresh the website of tensorboard by pressing any keys')
+                    else:
+                        print("Dir not removed: ", arg.model_saved_name)
+                self.train_writer = SummaryWriter(
+                    os.path.join(arg.model_saved_name, "train"), "train"
+                )
+                self.val_writer = SummaryWriter(
+                    os.path.join(arg.model_saved_name, "val"), "val"
+                )
+            else:
+                self.train_writer = self.val_writer = SummaryWriter(
+                    os.path.join(arg.model_saved_name, "test"), "test"
+                )
+        self.global_step = 0
+        self.load_model()
+        self.load_optimizer()
+        self.load_data()
+        self.lr = self.arg.base_lr
+        self.best_acc = 0
+        self.best_per_class_acc = 0
+        self.loss_nce = torch.nn.BCELoss()
+        self.my_logger = Logger(
+            os.path.join(arg.model_saved_name, "log.txt"), title="SWTAL"
+        )
+        self.my_logger.set_names(["Step", "cmap"] + [f"map_0.{i}" for i in range(1, 6)]+['avg'])
+    def load_data(self):
+        Feeder = import_class(self.arg.feeder)
+        self.data_loader = dict()
+        if self.arg.phase == "train":
+            self.data_loader["train"] = torch.utils.data.DataLoader(
+                dataset=Feeder(**self.arg.train_feeder_args),
+                batch_size=self.arg.batch_size,
+                shuffle=True,
+                num_workers=self.arg.num_worker,
+                drop_last=True,
+                collate_fn=collate_with_padding_multi_joint,
+            )
+        self.data_loader["test"] = torch.utils.data.DataLoader(
+            dataset=Feeder(**self.arg.test_feeder_args),
+            batch_size=self.arg.test_batch_size,
+            shuffle=False,
+            num_workers=self.arg.num_worker,
+            drop_last=False,
+            collate_fn=collate_with_padding_multi_joint,
+        )
+    def load_model(self):
+        output_device = (
+            self.arg.device[0] if type(self.arg.device) is list else self.arg.device
+        )
+        self.output_device = output_device
+        Model = import_class(self.arg.model)
+        shutil.copy2(inspect.getfile(Model), self.arg.work_dir)
+        # print(Model)
+        self.model = Model(**self.arg.model_args).cuda(output_device)
+        # print(self.model)
+        self.loss_type = arg.loss
+        if self.arg.weights:
+            # self.global_step = int(arg.weights[:-3].split("-")[-1])
+            self.print_log("Load weights from {}.".format(self.arg.weights))
+            if ".pkl" in self.arg.weights:
+                with open(self.arg.weights, "r") as f:
+                    weights = pickle.load(f)
+            else:
+                weights = torch.load(self.arg.weights)
+            weights = OrderedDict(
+                [
+                    [k.split("module.")[-1], v.cuda(output_device)]
+                    for k, v in weights.items()
+                ]
+            )
+            keys = list(weights.keys())
+            for w in self.arg.ignore_weights:
+                for key in keys:
+                    if w in key:
+                        if weights.pop(key, None) is not None:
+                            self.print_log(
+                                "Sucessfully Remove Weights: {}.".format(key)
+                            )
+                        else:
+                            self.print_log("Can Not Remove Weights: {}.".format(key))
+            try:
+                self.model.load_state_dict(weights)
+            except:
+                state = self.model.state_dict()
+                diff = list(set(state.keys()).difference(set(weights.keys())))
+                print("Can not find these weights:")
+                for d in diff:
+                    print("  " + d)
+                state.update(weights)
+                self.model.load_state_dict(state)
+        if type(self.arg.device) is list:
+            if len(self.arg.device) > 1:
+                self.model = nn.DataParallel(
+                    self.model, device_ids=self.arg.device, output_device=output_device
+                )
+    def load_optimizer(self):
+        if self.arg.optimizer == "SGD":
+            self.optimizer = optim.SGD(
+                self.model.parameters(),
+                lr=self.arg.base_lr,
+                momentum=0.9,
+                nesterov=self.arg.nesterov,
+                weight_decay=self.arg.weight_decay,
+            )
+        elif self.arg.optimizer == "Adam":
+            self.optimizer = optim.Adam(
+                self.model.parameters(),
+                lr=self.arg.base_lr,
+                weight_decay=self.arg.weight_decay,
+            )
+        else:
+            raise ValueError()
+    def save_arg(self):
+        # save arg
+        arg_dict = vars(self.arg)
+        if not os.path.exists(self.arg.work_dir):
+            os.makedirs(self.arg.work_dir)
+        with open("{}/config.yaml".format(self.arg.work_dir), "w") as f:
+            yaml.dump(arg_dict, f)
+    def adjust_learning_rate(self, epoch):
+        if self.arg.optimizer == "SGD" or self.arg.optimizer == "Adam":
+            if epoch < self.arg.warm_up_epoch:
+                lr = self.arg.base_lr * (epoch + 1) / self.arg.warm_up_epoch
+            else:
+                lr = self.arg.base_lr * (
+                    0.1 ** np.sum(epoch >= np.array(self.arg.step))
+                )
+            for param_group in self.optimizer.param_groups:
+                param_group["lr"] = lr
+            return lr
+        else:
+            raise ValueError()
+    def print_time(self):
+        localtime = time.asctime(time.localtime(time.time()))
+        self.print_log("Local current time :  " + localtime)
+    def print_log(self, str, print_time=True):
+        if print_time:
+            localtime = time.asctime(time.localtime(time.time()))
+            str = "[ " + localtime + " ] " + str
+        print(str)
+        if self.arg.print_log:
+            with open("{}/print_log.txt".format(self.arg.work_dir), "a") as f:
+                print(str, file=f)
+    def record_time(self):
+        self.cur_time = time.time()
+        return self.cur_time
+    def split_time(self):
+        split_time = time.time() - self.cur_time
+        self.record_time()
+        return split_time
+    def train(self, epoch, wb_dict, save_model=False):
+        self.model.train()
+        self.print_log("Training epoch: {}".format(epoch + 1))
+        loader = self.data_loader["train"]
+        self.adjust_learning_rate(epoch)
+        loss_value, batch_acc = [], []
+        self.train_writer.add_scalar("epoch", epoch, self.global_step)
+        self.record_time()
+        timer = dict(dataloader=0.001, model=0.001, statistics=0.001)
+        process = tqdm(loader)
+        if self.arg.only_train_part:
+            if epoch > self.arg.only_train_epoch:
+                print("only train part, require grad")
+                for key, value in self.model.named_parameters():
+                    if "PA" in key:
+                        value.requires_grad = True
+            else:
+                print("only train part, do not require grad")
+                for key, value in self.model.named_parameters():
+                    if "PA" in key:
+                        value.requires_grad = False
+        vid_preds = []
+        frm_preds = []
+        vid_lens = []
+        labels = []
+        results = []
+        indexs = []
+        '''
+        Switch to FULL supervision
+        Dataloader->Feeder ->collate_with_padding_multi_joint
+        '''
+        for batch_idx, (data, label, target, mask, index, soft_label) in enumerate(
+            process
+        ):
+            self.global_step += 1
+            # get data
+            data = data.float().cuda(self.output_device)
+            label = label.cuda(self.output_device)
+            target = target.cuda(self.output_device)
+            mask = mask.cuda(self.output_device)
+            soft_label = soft_label.cuda(self.output_device)
+            timer["dataloader"] += self.split_time()
+            ''' into one hot'''
+            ground_truth_flat = target.view(-1)
+            one_hot_ground_truth = F.one_hot(ground_truth_flat, num_classes=5)
+            ''' into one hot'''
+            indexs.extend(index.cpu().numpy().tolist())
+            ab_labels = torch.cat([label, torch.ones(label.size(0), 1).cuda()], -1)
+            # forward
+            mil_pred, frm_scrs, mil_pred_2, frm_scrs_2 = self.model(data,mask)
+            cls_mil_loss = self.loss_nce(mil_pred, ab_labels.float()) + self.loss_nce(
+                mil_pred_2, ab_labels.float()
+            )
+            if epoch > -1:
+                frm_scrs_re = rearrange(frm_scrs, "n t c -> (n t) c")
+                frm_scrs_2_re = rearrange(frm_scrs_2, "n t c -> (n t) c")
+                # soft_label = rearrange(soft_label, "n t c -> (n t) c")
+                loss = cls_mil_loss * 0.1 + mvl_loss(
+                    frm_scrs, frm_scrs_2, rate=0.2, weight=0.5
+                )
+                loss += cross_entropy_loss(
+                    frm_scrs_re, one_hot_ground_truth
+                ) + cross_entropy_loss(frm_scrs_2_re, one_hot_ground_truth)
+            # else:
+            #     loss = cls_mil_loss * self.arg.lambda_mil + mvl_loss(
+            #         frm_scrs, frm_scrs_2, rate=0.2, weight=0.5
+            #     )
+            for i in range(data.size(0)):
+                frm_scr = frm_scrs[i]
+                label_ = label[i].cpu().numpy()
+                mask_ = mask[i].cpu().numpy()
+                vid_len = mask_.sum()
+                frm_pred = F.softmax(frm_scr, -1).detach().cpu().numpy()[:vid_len]
+                vid_pred = mil_pred[i].detach().cpu().numpy()
+                results.append(frm_pred)
+                vid_preds.append(vid_pred)
+                frm_preds.append(frm_pred)
+                vid_lens.append(vid_len)
+                labels.append(label_)
+            # backward
+            self.optimizer.zero_grad()
+            loss.backward()
+            self.optimizer.step()
+            loss_value.append(loss.data.item())
+            timer["model"] += self.split_time()
+        vid_preds = np.array(vid_preds)
+        frm_preds = np.array(frm_preds)
+        vid_lens = np.array(vid_lens)
+        labels = np.array(labels)
+        loader.dataset.label_update(results, indexs)
+        cmap = cmAP(vid_preds, labels)
+        self.train_writer.add_scalar("acc", cmap, self.global_step)
+        self.train_writer.add_scalar("loss", np.mean(loss_value), self.global_step)
+        # statistics
+        self.lr = self.optimizer.param_groups[0]["lr"]
+        self.train_writer.add_scalar("lr", self.lr, self.global_step)
+        timer["statistics"] += self.split_time()
+        # statistics of time consumption and loss
+        self.print_log("\tMean training loss: {:.4f}.".format(np.mean(loss_value)))
+        self.print_log("\tAcc score: {:.3f}%".format(cmap))
+        # Log
+        wb_dict["train loss"] = np.mean(loss_value)
+        wb_dict["train acc"] = cmap
+        if save_model:
+            state_dict = self.model.state_dict()
+            weights = OrderedDict(
+                [[k.split("module.")[-1], v.cpu()] for k, v in state_dict.items()]
+            )
+            torch.save(
+                weights,
+                self.arg.model_saved_name + str(epoch) + ".pt",
+            )
+        return wb_dict
+    @torch.no_grad()
+    def eval(
+        self,
+        epoch,
+        wb_dict,
+        loader_name=["test"],
+    ):
+        self.model.eval()
+        self.print_log("Eval epoch: {}".format(epoch + 1))
+        vid_preds = []
+        frm_preds = []
+        vid_lens = []
+        labels = []
+        for ln in loader_name:
+            loss_value = []
+            step = 0
+            process = tqdm(self.data_loader[ln])
+            for batch_idx, (data, label, target, mask, index, soft_label) in enumerate(
+                process
+            ):
+                data = data.float().cuda(self.output_device)
+                label = label.cuda(self.output_device)
+                mask = mask.cuda(self.output_device)
+                ab_labels = torch.cat([label, torch.ones(label.size(0), 1).cuda()], -1)
+                # forward
+                mil_pred, frm_scrs, mil_pred_2, frm_scrs_2 = self.model(data,mask)
+                '''Loc LOSS'''
+                target = target.cuda(self.output_device)
+                ''' into one hot'''
+                ground_truth_flat = target.view(-1)
+                one_hot_ground_truth = F.one_hot(ground_truth_flat, num_classes=5)
+                ''' into one hot'''
+                frm_scrs_re = rearrange(frm_scrs, "n t c -> (n t) c")
+                frm_scrs_2_re = rearrange(frm_scrs_2, "n t c -> (n t) c")
+                '''Loc LOSS'''
+                cls_mil_loss = self.loss_nce(
+                    mil_pred, ab_labels.float()
+                ) + self.loss_nce(mil_pred_2, ab_labels.float())
+                loss_co = mvl_loss(frm_scrs, frm_scrs_2, rate=0.2, weight=0.5)
+                loss = cls_mil_loss * self.arg.lambda_mil + loss_co
+                '''Loc LOSS'''
+                loss += cross_entropy_loss(
+                    frm_scrs_re, one_hot_ground_truth
+                ) + cross_entropy_loss(frm_scrs_2_re, one_hot_ground_truth)
+                '''Loc LOSS'''
+                loss_value.append(loss.data.item())
+                for i in range(data.size(0)):
+                    frm_scr = frm_scrs[i]
+                    vid_pred = mil_pred[i]
+                    label_ = label[i].cpu().numpy()
+                    mask_ = mask[i].cpu().numpy()
+                    vid_len = mask_.sum()
+                    frm_pred = F.softmax(frm_scr, -1).cpu().numpy()[:vid_len]
+                    vid_pred = vid_pred.cpu().numpy()
+                    vid_preds.append(vid_pred)
+                    frm_preds.append(frm_pred)
+                    vid_lens.append(vid_len)
+                    labels.append(label_)
+                step += 1
+            vid_preds = np.array(vid_preds)
+            frm_preds = np.array(frm_preds)
+            vid_lens = np.array(vid_lens)
+            labels = np.array(labels)
+            cmap = cmAP(vid_preds, labels)
+            score = cmap
+            loss = np.mean(loss_value)
+            dmap, iou = dsmAP(
+                vid_preds,
+                frm_preds,
+                vid_lens,
+                self.arg.test_feeder_args["data_path"],
+                self.arg,
+                multi=True,
+            )
+            print("Classification map %f" % cmap)
+            for item in list(zip(iou, dmap)):
+                print("Detection map @ %f = %f" % (item[0], item[1]))
+            self.my_logger.append([epoch + 1, cmap] + dmap+ [np.mean(dmap)])
+            wb_dict["val loss"] = loss
+            wb_dict["val acc"] = score
+            if score > self.best_acc:
+                self.best_acc = score
+            print("Acc score: ", score, " model: ", self.arg.model_saved_name)
+            if self.arg.phase == "train":
+                self.val_writer.add_scalar("loss", loss, self.global_step)
+                self.val_writer.add_scalar("acc", score, self.global_step)
+            self.print_log(
+                "\tMean {} loss of {} batches: {}.".format(
+                    ln, len(self.data_loader[ln]), np.mean(loss_value)
+                )
+            )
+            self.print_log("\tAcc score: {:.3f}%".format(score))
+        return wb_dict
+    def start(self):
+        wb_dict = {}
+        if self.arg.phase == "train":
+            self.print_log("Parameters:\n{}\n".format(str(vars(self.arg))))
+            self.global_step = (
+                self.arg.start_epoch
+                * len(self.data_loader["train"])
+                / self.arg.batch_size
+            )
+            for epoch in range(self.arg.start_epoch, self.arg.num_epoch):
+                save_model = ((epoch + 1) % self.arg.save_interval == 0) or (
+                    epoch + 1 == self.arg.num_epoch
+                )
+                wb_dict = {"lr": self.lr}
+                # Train
+                wb_dict = self.train(epoch, wb_dict, save_model=save_model)
+                if epoch%10==0:
+                    # Eval. on val set
+                    wb_dict = self.eval(epoch, wb_dict, loader_name=["test"])
+                    # Log stats. for this epoch
+                    print("Epoch: {0}\nMetrics: {1}".format(epoch, wb_dict))
+            print(
+                "best accuracy: ",
+                self.best_acc,
+                " model_name: ",
+                self.arg.model_saved_name,
+            )
+        elif self.arg.phase == "test":
+            if not self.arg.test_feeder_args["debug"]:
+                wf = self.arg.model_saved_name + "_wrong.txt"
+                rf = self.arg.model_saved_name + "_right.txt"
+            else:
+                wf = rf = None
+            if self.arg.weights is None:
+                raise ValueError("Please appoint --weights.")
+            self.arg.print_log = False
+            self.print_log("Model:   {}.".format(self.arg.model))
+            self.print_log("Weights: {}.".format(self.arg.weights))
+            wb_dict = self.eval(
+                epoch=0,
+                wb_dict=wb_dict,
+                loader_name=["test"],
+                wrong_file=wf,
+                result_file=rf,
+            )
+            print("Inference metrics: ", wb_dict)
+            self.print_log("Done.\n")
+def str2bool(v):
+    if v.lower() in ("yes", "true", "t", "y", "1"):
+        return True
+    elif v.lower() in ("no", "false", "f", "n", "0"):
+        return False
+    else:
+        raise argparse.ArgumentTypeError("Boolean value expected.")
+def import_class(name):
+    components = name.split(".")
+    mod = __import__(components[0])
+    for comp in components[1:]:
+        mod = getattr(mod, comp)
+    return mod
+if __name__ == "__main__":
+    parser = get_parser()
+    # load arg form config file
+    p = parser.parse_args()
+    if p.config is not None:
+        with open(p.config, "r") as f:
+            default_arg = yaml.safe_load(f)
+        key = vars(p).keys()
+        for k in default_arg.keys():
+            if k not in key:
+                print("WRONG ARG: {}".format(k))
+                assert k in key
+        parser.set_defaults(**default_arg)
+    arg = parser.parse_args()
+    print("BABEL Action Recognition")
+    print("Config: ", arg)
+    init_seed(arg.seed)
+    processor = Processor(arg)
+    processor.start()

train_full_SSL.py ADDED Viewed

	@@ -0,0 +1,784 @@

+"""
+Copyright 2023 LINE Corporation
+LINE Corporation licenses this file to you under the Apache License,
+version 2.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at:
+    https://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+License for the specific language governing permissions and limitations
+under the License.
+"""
+from __future__ import print_function
+import argparse
+import inspect
+import os
+import pdb
+import pickle
+import random
+import re
+import shutil
+import time
+from collections import *
+import ipdb
+import numpy as np
+# torch
+import torch
+import torch.backends.cudnn as cudnn
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+import yaml
+from einops import rearrange, reduce, repeat
+from evaluation.classificationMAP import getClassificationMAP as cmAP
+from evaluation.detectionMAP import getSingleStreamDetectionMAP as dsmAP
+from feeders.tools import collate_with_padding_multi_joint
+from model.losses import cross_entropy_loss, mvl_loss
+from sklearn.metrics import f1_score
+# Custom
+from tensorboardX import SummaryWriter
+from torch.autograd import Variable
+from torch.optim.lr_scheduler import _LRScheduler
+from tqdm import tqdm
+from utils.logger import Logger
+def remove_prefix_from_state_dict(state_dict, prefix):
+    new_state_dict = {}
+    for k, v in state_dict.items():
+        if k.startswith(prefix):
+            new_k = k[len(prefix):]  # strip the prefix
+        else:
+            new_k = k
+        new_state_dict[new_k] = v
+    return new_state_dict
+def init_seed(seed):
+    torch.cuda.manual_seed_all(seed)
+    torch.manual_seed(seed)
+    np.random.seed(seed)
+    random.seed(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+def get_parser():
+    # parameter priority: command line > config > default
+    parser = argparse.ArgumentParser(
+        description="Spatial Temporal Graph Convolution Network"
+    )
+    parser.add_argument(
+        "--work-dir",
+        default="./work_dir/temp",
+        help="the work folder for storing results",
+    )
+    parser.add_argument("-model_saved_name", default="")
+    parser.add_argument(
+        "--config",
+        default="./config/nturgbd-cross-view/test_bone.yaml",
+        help="path to the configuration file",
+    )
+    # processor
+    parser.add_argument("--phase", default="train", help="must be train or test")
+    # visulize and debug
+    parser.add_argument("--seed", type=int, default=5, help="random seed for pytorch")
+    parser.add_argument(
+        "--log-interval",
+        type=int,
+        default=100,
+        help="the interval for printing messages (#iteration)",
+    )
+    parser.add_argument(
+        "--save-interval",
+        type=int,
+        default=2,
+        help="the interval for storing models (#iteration)",
+    )
+    parser.add_argument(
+        "--eval-interval",
+        type=int,
+        default=5,
+        help="the interval for evaluating models (#iteration)",
+    )
+    parser.add_argument(
+        "--print-log", type=str2bool, default=True, help="print logging or not"
+    )
+    parser.add_argument(
+        "--show-topk",
+        type=int,
+        default=[1, 5],
+        nargs="+",
+        help="which Top K accuracy will be shown",
+    )
+    # feeder
+    parser.add_argument(
+        "--feeder", default="feeder.feeder", help="data loader will be used"
+    )
+    parser.add_argument(
+        "--num-worker",
+        type=int,
+        default=32,
+        help="the number of worker for data loader",
+    )
+    parser.add_argument(
+        "--train-feeder-args",
+        default=dict(),
+        help="the arguments of data loader for training",
+    )
+    parser.add_argument(
+        "--test-feeder-args",
+        default=dict(),
+        help="the arguments of data loader for test",
+    )
+    # model
+    parser.add_argument("--model", default=None, help="the model will be used")
+    parser.add_argument(
+        "--model-args", type=dict, default=dict(), help="the arguments of model"
+    )
+    parser.add_argument(
+        "--weights", default=None, help="the weights for network initialization"
+    )
+    parser.add_argument(
+        "--ignore-weights",
+        type=str,
+        default=[],
+        nargs="+",
+        help="the name of weights which will be ignored in the initialization",
+    )
+    # optim
+    parser.add_argument(
+        "--base-lr", type=float, default=0.01, help="initial learning rate"
+    )
+    parser.add_argument(
+        "--step",
+        type=int,
+        default=[200],
+        nargs="+",
+        help="the epoch where optimizer reduce the learning rate",
+    )
+    # training
+    parser.add_argument(
+        "--device",
+        type=int,
+        default=0,
+        nargs="+",
+        help="the indexes of GPUs for training or testing",
+    )
+    parser.add_argument("--optimizer", default="SGD", help="type of optimizer")
+    parser.add_argument(
+        "--nesterov", type=str2bool, default=False, help="use nesterov or not"
+    )
+    parser.add_argument(
+        "--batch-size", type=int, default=256, help="training batch size"
+    )
+    parser.add_argument(
+        "--test-batch-size", type=int, default=256, help="test batch size"
+    )
+    parser.add_argument(
+        "--start-epoch", type=int, default=0, help="start training from which epoch"
+    )
+    parser.add_argument(
+        "--num-epoch", type=int, default=80, help="stop training in which epoch"
+    )
+    parser.add_argument(
+        "--weight-decay", type=float, default=0.0005, help="weight decay for optimizer"
+    )
+    # loss
+    parser.add_argument("--loss", type=str, default="CE", help="loss type(CE or focal)")
+    parser.add_argument(
+        "--label_count_path",
+        default=None,
+        type=str,
+        help="Path to label counts (used in loss weighting)",
+    )
+    parser.add_argument(
+        "---beta",
+        type=float,
+        default=0.9999,
+        help="Hyperparameter for Class balanced loss",
+    )
+    parser.add_argument(
+        "--gamma", type=float, default=2.0, help="Hyperparameter for Focal loss"
+    )
+    parser.add_argument("--only_train_part", default=False)
+    parser.add_argument("--only_train_epoch", default=0)
+    parser.add_argument("--warm_up_epoch", default=10)
+    parser.add_argument(
+        "--lambda-mil", default=1.0, help="balancing hyper-parameter of mil branch"
+    )
+    parser.add_argument(
+        "--class-threshold",
+        type=float,
+        default=0.1,
+        help="class threshold for rejection",
+    )
+    parser.add_argument(
+        "--start-threshold",
+        type=float,
+        default=0.03,
+        help="start threshold for action localization",
+    )
+    parser.add_argument(
+        "--end-threshold",
+        type=float,
+        default=0.055,
+        help="end threshold for action localization",
+    )
+    parser.add_argument(
+        "--threshold-interval",
+        type=float,
+        default=0.005,
+        help="threshold interval for action localization",
+    )
+    return parser
+class Processor:
+    """
+    Processor for Skeleton-based Action Recgnition
+    """
+    def __init__(self, arg):
+        self.arg = arg
+        self.save_arg()
+        if arg.phase == "train":
+            if not arg.train_feeder_args["debug"]:
+                if os.path.isdir(arg.model_saved_name):
+                    print("log_dir: ", arg.model_saved_name, "already exist")
+                    # answer = input('delete it? y/n:')
+                    answer = "y"
+                    if answer == "y":
+                        print("Deleting dir...")
+                        shutil.rmtree(arg.model_saved_name)
+                        print("Dir removed: ", arg.model_saved_name)
+                        # input('Refresh the website of tensorboard by pressing any keys')
+                    else:
+                        print("Dir not removed: ", arg.model_saved_name)
+                self.train_writer = SummaryWriter(
+                    os.path.join(arg.model_saved_name, "train"), "train"
+                )
+                self.val_writer = SummaryWriter(
+                    os.path.join(arg.model_saved_name, "val"), "val"
+                )
+            else:
+                self.train_writer = self.val_writer = SummaryWriter(
+                    os.path.join(arg.model_saved_name, "test"), "test"
+                )
+        self.global_step = 0
+        self.load_model()
+        self.load_optimizer()
+        self.load_data()
+        self.lr = self.arg.base_lr
+        self.best_acc = 0
+        self.best_per_class_acc = 0
+        self.loss_nce = torch.nn.BCELoss()
+        self.my_logger = Logger(
+            os.path.join(arg.model_saved_name, "log.txt"), title="SWTAL"
+        )
+        self.my_logger.set_names(["Step", "cmap"] + [f"map_0.{i}" for i in range(1, 6)]+["avg"])
+    def load_data(self):
+        Feeder = import_class(self.arg.feeder)
+        self.data_loader = dict()
+        if self.arg.phase == "train":
+            self.data_loader["train"] = torch.utils.data.DataLoader(
+                dataset=Feeder(**self.arg.train_feeder_args),
+                batch_size=self.arg.batch_size,
+                shuffle=True,
+                num_workers=self.arg.num_worker,
+                drop_last=True,
+                collate_fn=collate_with_padding_multi_joint,
+            )
+        self.data_loader["test"] = torch.utils.data.DataLoader(
+            dataset=Feeder(**self.arg.test_feeder_args),
+            batch_size=self.arg.test_batch_size,
+            shuffle=False,
+            num_workers=self.arg.num_worker,
+            drop_last=False,
+            collate_fn=collate_with_padding_multi_joint,
+        )
+    def load_model(self):
+        output_device = (
+            self.arg.device[0] if type(self.arg.device) is list else self.arg.device
+        )
+        self.output_device = output_device
+        Model = import_class(self.arg.model)
+        shutil.copy2(inspect.getfile(Model), self.arg.work_dir)
+        # print(Model)
+        self.model = Model(**self.arg.model_args).cuda(output_device)
+        # print(self.model)
+        self.loss_type = arg.loss
+        if self.arg.weights:
+            self.print_log("Load weights from {}.".format(self.arg.weights))
+            if ".pkl" in self.arg.weights:
+                with open(self.arg.weights, "r") as f:
+                    weights = pickle.load(f)
+            else:
+                weights = torch.load(self.arg.weights)
+            weights = OrderedDict(
+                [
+                    [k.split("module.")[-1], v.cuda(output_device)]
+                    for k, v in weights.items()
+                ]
+            )
+            weights = remove_prefix_from_state_dict(weights, 'encoder_q.agcn.')
+            keys = list(weights.keys())
+            self.arg.ignore_weights = ['data_bn','fc','encoder_q','encoder_k','queue','queue_ptr','value_transform']
+            for w in self.arg.ignore_weights:
+                for key in keys:
+                    if w in key:
+                        if weights.pop(key, None) is not None:
+                            continue
+                        #     self.print_log(
+                        #         "Sucessfully Remove Weights: {}.".format(key)
+                        #     )
+                        # else:
+                        #     self.print_log("Can Not Remove Weights: {}.".format(key))
+            try:
+                self.model.load_state_dict(weights)
+            except:
+                state = self.model.state_dict()
+                diff = list(set(state.keys()).difference(set(weights.keys())))
+                print("Can not find these weights:")
+                for d in diff:
+                    print("  " + d)
+                state.update(weights)
+                self.model.load_state_dict(state)
+        if type(self.arg.device) is list:
+            if len(self.arg.device) > 1:
+                self.model = nn.DataParallel(
+                    self.model, device_ids=self.arg.device, output_device=output_device
+                )
+    def load_optimizer(self):
+        if self.arg.optimizer == "SGD":
+            self.optimizer = optim.SGD(
+                self.model.parameters(),
+                lr=self.arg.base_lr,
+                momentum=0.9,
+                nesterov=self.arg.nesterov,
+                weight_decay=self.arg.weight_decay,
+            )
+        elif self.arg.optimizer == "Adam":
+            self.optimizer = optim.Adam(
+                self.model.parameters(),
+                lr=self.arg.base_lr,
+                weight_decay=self.arg.weight_decay,
+            )
+        else:
+            raise ValueError()
+    def save_arg(self):
+        # save arg
+        arg_dict = vars(self.arg)
+        if not os.path.exists(self.arg.work_dir):
+            os.makedirs(self.arg.work_dir)
+        with open("{}/config.yaml".format(self.arg.work_dir), "w") as f:
+            yaml.dump(arg_dict, f)
+    def adjust_learning_rate(self, epoch):
+        if self.arg.optimizer == "SGD" or self.arg.optimizer == "Adam":
+            if epoch < self.arg.warm_up_epoch:
+                lr = self.arg.base_lr * (epoch + 1) / self.arg.warm_up_epoch
+            else:
+                lr = self.arg.base_lr * (
+                    0.1 ** np.sum(epoch >= np.array(self.arg.step))
+                )
+            for param_group in self.optimizer.param_groups:
+                param_group["lr"] = lr
+            return lr
+        else:
+            raise ValueError()
+    def print_time(self):
+        localtime = time.asctime(time.localtime(time.time()))
+        self.print_log("Local current time :  " + localtime)
+    def print_log(self, str, print_time=True):
+        if print_time:
+            localtime = time.asctime(time.localtime(time.time()))
+            str = "[ " + localtime + " ] " + str
+        print(str)
+        if self.arg.print_log:
+            with open("{}/print_log.txt".format(self.arg.work_dir), "a") as f:
+                print(str, file=f)
+    def record_time(self):
+        self.cur_time = time.time()
+        return self.cur_time
+    def split_time(self):
+        split_time = time.time() - self.cur_time
+        self.record_time()
+        return split_time
+    def train(self, epoch, wb_dict, save_model=False):
+        self.model.train()
+        self.print_log("Training epoch: {}".format(epoch + 1))
+        loader = self.data_loader["train"]
+        self.adjust_learning_rate(epoch)
+        loss_value, batch_acc = [], []
+        self.train_writer.add_scalar("epoch", epoch, self.global_step)
+        self.record_time()
+        timer = dict(dataloader=0.001, model=0.001, statistics=0.001)
+        process = tqdm(loader)
+        if self.arg.only_train_part:
+            if epoch > self.arg.only_train_epoch:
+                print("only train part, require grad")
+                for key, value in self.model.named_parameters():
+                    if "PA" in key:
+                        value.requires_grad = True
+            else:
+                print("only train part, do not require grad")
+                for key, value in self.model.named_parameters():
+                    if "PA" in key:
+                        value.requires_grad = False
+        vid_preds = []
+        frm_preds = []
+        vid_lens = []
+        labels = []
+        results = []
+        indexs = []
+        '''
+        Switch to FULL supervision
+        Dataloader->Feeder -> collate_with_padding_multi_joint
+        '''
+        for batch_idx, (data, label, target, mask, index, soft_label) in enumerate(
+            process
+        ):
+            self.global_step += 1
+            # get data
+            data = data.float().cuda(self.output_device)
+            label = label.cuda(self.output_device)
+            target = target.cuda(self.output_device)
+            mask = mask.cuda(self.output_device)
+            soft_label = soft_label.cuda(self.output_device)
+            timer["dataloader"] += self.split_time()
+            ''' into one hot'''
+            ground_truth_flat = target.view(-1)
+            one_hot_ground_truth = F.one_hot(ground_truth_flat, num_classes=5)
+            ''' into one hot'''
+            indexs.extend(index.cpu().numpy().tolist())
+            ab_labels = torch.cat([label, torch.ones(label.size(0), 1).cuda()], -1)
+            # forward
+            frm_scrs = self.model(data)
+            if epoch > -1:
+                frm_scrs_re = rearrange(frm_scrs, "n t c -> (n t) c")
+                # frm_scrs_2_re = rearrange(frm_scrs_2, "n t c -> (n t) c")
+                # soft_label = rearrange(soft_label, "n t c -> (n t) c")
+                # loss = cls_mil_loss * 0.1 + mvl_loss(
+                #     frm_scrs, frm_scrs_2, rate=0.2, weight=0.5
+                # )
+                loss = cross_entropy_loss(
+                    frm_scrs_re, one_hot_ground_truth
+                ) #+ cross_entropy_loss(frm_scrs_2_re, one_hot_ground_truth)
+            for i in range(data.size(0)):
+                frm_scr = frm_scrs[i]
+                label_ = label[i].cpu().numpy()
+                mask_ = mask[i].cpu().numpy()
+                vid_len = mask_.sum()
+                frm_pred = F.softmax(frm_scr, -1).detach().cpu().numpy()[:vid_len]
+                # vid_pred = mil_pred[i].detach().cpu().numpy()
+                vid_pred = 0
+                results.append(frm_pred)
+                vid_preds.append(vid_pred)
+                frm_preds.append(frm_pred)
+                vid_lens.append(vid_len)
+                labels.append(label_)
+            # backward
+            self.optimizer.zero_grad()
+            loss.backward()
+            self.optimizer.step()
+            loss_value.append(loss.data.item())
+            timer["model"] += self.split_time()
+        vid_preds = np.array(vid_preds)
+        frm_preds = np.array(frm_preds)
+        vid_lens = np.array(vid_lens)
+        labels = np.array(labels)
+        loader.dataset.label_update(results, indexs)
+        # cmap = cmAP(vid_preds, labels)
+        cmap = 0
+        self.train_writer.add_scalar("acc", cmap, self.global_step)
+        self.train_writer.add_scalar("loss", np.mean(loss_value), self.global_step)
+        # statistics
+        self.lr = self.optimizer.param_groups[0]["lr"]
+        self.train_writer.add_scalar("lr", self.lr, self.global_step)
+        timer["statistics"] += self.split_time()
+        # statistics of time consumption and loss
+        self.print_log("\tMean training loss: {:.4f}.".format(np.mean(loss_value)))
+        self.print_log("\tAcc score: {:.3f}%".format(cmap))
+        # Log
+        wb_dict["train loss"] = np.mean(loss_value)
+        wb_dict["train acc"] = cmap
+        if save_model:
+            state_dict = self.model.state_dict()
+            weights = OrderedDict(
+                [[k.split("module.")[-1], v.cpu()] for k, v in state_dict.items()]
+            )
+            torch.save(
+                weights,
+                self.arg.model_saved_name + str(epoch) + ".pt",
+            )
+        return wb_dict
+    @torch.no_grad()
+    def eval(
+        self,
+        epoch,
+        wb_dict,
+        loader_name=["test"],
+    ):
+        self.model.eval()
+        self.print_log("Eval epoch: {}".format(epoch + 1))
+        vid_preds = []
+        frm_preds = []
+        vid_lens = []
+        labels = []
+        for ln in loader_name:
+            loss_value = []
+            step = 0
+            process = tqdm(self.data_loader[ln])
+            for batch_idx, (data, label, target, mask, index, soft_label) in enumerate(
+                process
+            ):
+                data = data.float().cuda(self.output_device)
+                label = label.cuda(self.output_device)
+                mask = mask.cuda(self.output_device)
+                ab_labels = torch.cat([label, torch.ones(label.size(0), 1).cuda()], -1)
+                # forward
+                frm_scrs = self.model(data)
+                '''Loc LOSS'''
+                target = target.cuda(self.output_device)
+                ''' into one hot'''
+                ground_truth_flat = target.view(-1)
+                one_hot_ground_truth = F.one_hot(ground_truth_flat, num_classes=5)
+                ''' into one hot'''
+                frm_scrs_re = rearrange(frm_scrs, "n t c -> (n t) c")
+                '''Loc LOSS'''
+                '''Loc LOSS'''
+                loss = cross_entropy_loss(
+                    frm_scrs_re, one_hot_ground_truth
+                )
+                '''Loc LOSS'''
+                loss_value.append(loss.data.item())
+                for i in range(data.size(0)):
+                    frm_scr = frm_scrs[i]
+                    label_ = label[i].cpu().numpy()
+                    mask_ = mask[i].cpu().numpy()
+                    vid_len = mask_.sum()
+                    frm_pred = F.softmax(frm_scr, -1).cpu().numpy()[:vid_len]
+                    # vid_pred = vid_pred.cpu().numpy()
+                    vid_pred = 0
+                    vid_preds.append(vid_pred)
+                    frm_preds.append(frm_pred)
+                    vid_lens.append(vid_len)
+                    labels.append(label_)
+                step += 1
+            vid_preds = np.array(vid_preds)
+            frm_preds = np.array(frm_preds)
+            vid_lens = np.array(vid_lens)
+            labels = np.array(labels)
+            # cmap = cmAP(vid_preds, labels)
+            cmap = 0
+            score = cmap
+            loss = np.mean(loss_value)
+            dmap, iou = dsmAP(
+                vid_preds,
+                frm_preds,
+                vid_lens,
+                self.arg.test_feeder_args["data_path"],
+                self.arg,
+                multi=True,
+            )
+            print("Classification map %f" % cmap)
+            for item in list(zip(iou, dmap)):
+                print("Detection map @ %f = %f" % (item[0], item[1]))
+            self.my_logger.append([epoch + 1, cmap] + dmap+[np.mean(dmap)])
+            wb_dict["val loss"] = loss
+            wb_dict["val acc"] = score
+            if score > self.best_acc:
+                self.best_acc = score
+            print("Acc score: ", score, " model: ", self.arg.model_saved_name)
+            if self.arg.phase == "train":
+                self.val_writer.add_scalar("loss", loss, self.global_step)
+                self.val_writer.add_scalar("acc", score, self.global_step)
+            self.print_log(
+                "\tMean {} loss of {} batches: {}.".format(
+                    ln, len(self.data_loader[ln]), np.mean(loss_value)
+                )
+            )
+            self.print_log("\tAcc score: {:.3f}%".format(score))
+        return wb_dict
+    def start(self):
+        wb_dict = {}
+        if self.arg.phase == "train":
+            self.print_log("Parameters:\n{}\n".format(str(vars(self.arg))))
+            self.global_step = (
+                self.arg.start_epoch
+                * len(self.data_loader["train"])
+                / self.arg.batch_size
+            )
+            for epoch in range(self.arg.start_epoch, self.arg.num_epoch):
+                save_model = ((epoch + 1) % self.arg.save_interval == 0) or (
+                    epoch + 1 == self.arg.num_epoch
+                )
+                wb_dict = {"lr": self.lr}
+                # Train
+                wb_dict = self.train(epoch, wb_dict, save_model=save_model)
+                if epoch%1==0:
+                # Eval. on val set
+                    wb_dict = self.eval(epoch, wb_dict, loader_name=["test"])
+                    # Log stats. for this epoch
+                    print("Epoch: {0}\nMetrics: {1}".format(epoch, wb_dict))
+            print(
+                "best accuracy: ",
+                self.best_acc,
+                " model_name: ",
+                self.arg.model_saved_name,
+            )
+        elif self.arg.phase == "test":
+            if not self.arg.test_feeder_args["debug"]:
+                wf = self.arg.model_saved_name + "_wrong.txt"
+                rf = self.arg.model_saved_name + "_right.txt"
+            else:
+                wf = rf = None
+            if self.arg.weights is None:
+                raise ValueError("Please appoint --weights.")
+            self.arg.print_log = False
+            self.print_log("Model:   {}.".format(self.arg.model))
+            self.print_log("Weights: {}.".format(self.arg.weights))
+            wb_dict = self.eval(
+                epoch=0,
+                wb_dict=wb_dict,
+                loader_name=["test"],
+                wrong_file=wf,
+                result_file=rf,
+            )
+            print("Inference metrics: ", wb_dict)
+            self.print_log("Done.\n")
+def str2bool(v):
+    if v.lower() in ("yes", "true", "t", "y", "1"):
+        return True
+    elif v.lower() in ("no", "false", "f", "n", "0"):
+        return False
+    else:
+        raise argparse.ArgumentTypeError("Boolean value expected.")
+def import_class(name):
+    components = name.split(".")
+    mod = __import__(components[0])
+    for comp in components[1:]:
+        mod = getattr(mod, comp)
+    return mod
+if __name__ == "__main__":
+    parser = get_parser()
+    # load arg form config file
+    p = parser.parse_args()
+    if p.config is not None:
+        with open(p.config, "r") as f:
+            default_arg = yaml.safe_load(f)
+        key = vars(p).keys()
+        for k in default_arg.keys():
+            if k not in key:
+                print("WRONG ARG: {}".format(k))
+                assert k in key
+        parser.set_defaults(**default_arg)
+    arg = parser.parse_args()
+    print("BABEL Action Recognition")
+    print("Config: ", arg)
+    init_seed(arg.seed)
+    processor = Processor(arg)
+    processor.start()

train_full_SSL_Unet.py ADDED Viewed

	@@ -0,0 +1,813 @@

+"""
+Copyright 2023 LINE Corporation
+LINE Corporation licenses this file to you under the Apache License,
+version 2.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at:
+    https://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+License for the specific language governing permissions and limitations
+under the License.
+"""
+from __future__ import print_function
+import argparse
+import inspect
+import os
+import pdb
+import pickle
+import random
+import re
+import shutil
+import time
+from collections import *
+import ipdb
+import numpy as np
+# torch
+import torch
+import torch.backends.cudnn as cudnn
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+import yaml
+from einops import rearrange, reduce, repeat
+from evaluation.classificationMAP import getClassificationMAP as cmAP
+from evaluation.detectionMAP import getSingleStreamDetectionMAP as dsmAP
+from feeders.tools import collate_with_padding_multi_joint
+from model.losses import cross_entropy_loss, mvl_loss
+from sklearn.metrics import f1_score
+# Custom
+from tensorboardX import SummaryWriter
+from torch.autograd import Variable
+from torch.optim.lr_scheduler import _LRScheduler
+from tqdm import tqdm
+from utils.logger import Logger
+def remove_prefix_from_state_dict(state_dict, prefix):
+    new_state_dict = {}
+    for k, v in state_dict.items():
+        if k.startswith(prefix):
+            new_k = k[len(prefix):]  # strip the prefix
+        else:
+            new_k = k
+        new_state_dict[new_k] = v
+    return new_state_dict
+def init_seed(seed):
+    torch.cuda.manual_seed_all(seed)
+    torch.manual_seed(seed)
+    np.random.seed(seed)
+    random.seed(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+    # torch.use_deterministic_algorithms(True)
+def get_parser():
+    # parameter priority: command line > config > default
+    parser = argparse.ArgumentParser(
+        description="Spatial Temporal Graph Convolution Network"
+    )
+    parser.add_argument(
+        "--work-dir",
+        default="./work_dir/temp",
+        help="the work folder for storing results",
+    )
+    parser.add_argument("-model_saved_name", default="")
+    parser.add_argument(
+        "--config",
+        default="./config/nturgbd-cross-view/test_bone.yaml",
+        help="path to the configuration file",
+    )
+    # processor
+    parser.add_argument("--phase", default="train", help="must be train or test")
+    # visulize and debug
+    parser.add_argument("--seed", type=int, default=5, help="random seed for pytorch")
+    parser.add_argument(
+        "--log-interval",
+        type=int,
+        default=100,
+        help="the interval for printing messages (#iteration)",
+    )
+    parser.add_argument(
+        "--save-interval",
+        type=int,
+        default=2,
+        help="the interval for storing models (#iteration)",
+    )
+    parser.add_argument(
+        "--eval-interval",
+        type=int,
+        default=5,
+        help="the interval for evaluating models (#iteration)",
+    )
+    parser.add_argument(
+        "--print-log", type=str2bool, default=True, help="print logging or not"
+    )
+    parser.add_argument(
+        "--show-topk",
+        type=int,
+        default=[1, 5],
+        nargs="+",
+        help="which Top K accuracy will be shown",
+    )
+    # feeder
+    parser.add_argument(
+        "--feeder", default="feeder.feeder", help="data loader will be used"
+    )
+    parser.add_argument(
+        "--num-worker",
+        type=int,
+        default=32,
+        help="the number of worker for data loader",
+    )
+    parser.add_argument(
+        "--train-feeder-args",
+        default=dict(),
+        help="the arguments of data loader for training",
+    )
+    parser.add_argument(
+        "--test-feeder-args",
+        default=dict(),
+        help="the arguments of data loader for test",
+    )
+    # model
+    parser.add_argument("--model", default=None, help="the model will be used")
+    parser.add_argument(
+        "--model-args", type=dict, default=dict(), help="the arguments of model"
+    )
+    parser.add_argument(
+        "--weights", default=None, help="the weights for network initialization"
+    )
+    parser.add_argument(
+        "--ignore-weights",
+        type=str,
+        default=[],
+        nargs="+",
+        help="the name of weights which will be ignored in the initialization",
+    )
+    # optim
+    parser.add_argument(
+        "--base-lr", type=float, default=0.01, help="initial learning rate"
+    )
+    parser.add_argument(
+        "--step",
+        type=int,
+        default=[200],
+        nargs="+",
+        help="the epoch where optimizer reduce the learning rate",
+    )
+    # training
+    parser.add_argument(
+        "--device",
+        type=int,
+        default=0,
+        nargs="+",
+        help="the indexes of GPUs for training or testing",
+    )
+    parser.add_argument("--optimizer", default="SGD", help="type of optimizer")
+    parser.add_argument(
+        "--nesterov", type=str2bool, default=False, help="use nesterov or not"
+    )
+    parser.add_argument(
+        "--batch-size", type=int, default=256, help="training batch size"
+    )
+    parser.add_argument(
+        "--test-batch-size", type=int, default=256, help="test batch size"
+    )
+    parser.add_argument(
+        "--start-epoch", type=int, default=0, help="start training from which epoch"
+    )
+    parser.add_argument(
+        "--num-epoch", type=int, default=80, help="stop training in which epoch"
+    )
+    parser.add_argument(
+        "--weight-decay", type=float, default=0.0005, help="weight decay for optimizer"
+    )
+    # loss
+    parser.add_argument("--loss", type=str, default="CE", help="loss type(CE or focal)")
+    parser.add_argument(
+        "--label_count_path",
+        default=None,
+        type=str,
+        help="Path to label counts (used in loss weighting)",
+    )
+    parser.add_argument(
+        "---beta",
+        type=float,
+        default=0.9999,
+        help="Hyperparameter for Class balanced loss",
+    )
+    parser.add_argument(
+        "--gamma", type=float, default=2.0, help="Hyperparameter for Focal loss"
+    )
+    parser.add_argument("--only_train_part", default=False)
+    parser.add_argument("--only_train_epoch", default=0)
+    parser.add_argument("--warm_up_epoch", default=10)
+    parser.add_argument(
+        "--lambda-mil", default=1.0, help="balancing hyper-parameter of mil branch"
+    )
+    parser.add_argument(
+        "--class-threshold",
+        type=float,
+        default=0.1,
+        help="class threshold for rejection",
+    )
+    parser.add_argument(
+        "--start-threshold",
+        type=float,
+        default=0.03,
+        help="start threshold for action localization",
+    )
+    parser.add_argument(
+        "--end-threshold",
+        type=float,
+        default=0.055,
+        help="end threshold for action localization",
+    )
+    parser.add_argument(
+        "--threshold-interval",
+        type=float,
+        default=0.005,
+        help="threshold interval for action localization",
+    )
+    return parser
+class Processor:
+    """
+    Processor for Skeleton-based Action Recgnition
+    """
+    def __init__(self, arg):
+        self.arg = arg
+        self.save_arg()
+        if arg.phase == "train":
+            if not arg.train_feeder_args["debug"]:
+                if os.path.isdir(arg.model_saved_name):
+                    print("log_dir: ", arg.model_saved_name, "already exist")
+                    # answer = input('delete it? y/n:')
+                    answer = "y"
+                    if answer == "y":
+                        print("Deleting dir...")
+                        shutil.rmtree(arg.model_saved_name)
+                        print("Dir removed: ", arg.model_saved_name)
+                        # input('Refresh the website of tensorboard by pressing any keys')
+                    else:
+                        print("Dir not removed: ", arg.model_saved_name)
+                self.train_writer = SummaryWriter(
+                    os.path.join(arg.model_saved_name, "train"), "train"
+                )
+                self.val_writer = SummaryWriter(
+                    os.path.join(arg.model_saved_name, "val"), "val"
+                )
+            else:
+                self.train_writer = self.val_writer = SummaryWriter(
+                    os.path.join(arg.model_saved_name, "test"), "test"
+                )
+        self.global_step = 0
+        self.load_model()
+        self.load_optimizer()
+        self.load_data()
+        self.lr = self.arg.base_lr
+        self.best_acc = 0
+        self.best_per_class_acc = 0
+        self.loss_nce = torch.nn.BCELoss()
+        self.my_logger = Logger(
+            os.path.join(arg.model_saved_name, "log.txt"), title="SWTAL"
+        )
+        self.my_logger.set_names(["Step", "cmap"] + [f"map_0.{i}" for i in range(1, 6)]+ ['avg'])
+    def load_data(self):
+        seed = self.arg.seed if hasattr(self.arg, "seed") else 42
+        def seed_worker(worker_id):
+            worker_seed = seed + worker_id
+            np.random.seed(worker_seed)
+            random.seed(worker_seed)
+        g = torch.Generator()
+        g.manual_seed(seed)
+        Feeder = import_class(self.arg.feeder)
+        self.data_loader = dict()
+        if self.arg.phase == "train":
+            self.data_loader["train"] = torch.utils.data.DataLoader(
+                dataset=Feeder(**self.arg.train_feeder_args),
+                batch_size=self.arg.batch_size,
+                shuffle=True,
+                num_workers=self.arg.num_worker,
+                drop_last=True,
+                collate_fn=collate_with_padding_multi_joint,
+                worker_init_fn=seed_worker,   # ✅ 固定每个worker的seed
+                generator=g
+            )
+        self.data_loader["test"] = torch.utils.data.DataLoader(
+            dataset=Feeder(**self.arg.test_feeder_args),
+            batch_size=self.arg.test_batch_size,
+            shuffle=False,
+            num_workers=self.arg.num_worker,
+            drop_last=False,
+            collate_fn=collate_with_padding_multi_joint,
+            worker_init_fn=seed_worker,   # ✅ 固定每个worker的seed
+            generator=g
+        )
+    def load_model(self):
+        output_device = (
+            self.arg.device[0] if type(self.arg.device) is list else self.arg.device
+        )
+        self.output_device = output_device
+        Model = import_class(self.arg.model)
+        shutil.copy2(inspect.getfile(Model), self.arg.work_dir)
+        # print(Model)
+        self.model = Model(**self.arg.model_args).cuda(output_device)
+        # print(self.model)
+        self.loss_type = arg.loss
+        if self.arg.weights:
+        # if False:
+            # self.global_step = int(arg.weights[:-3].split("-")[-1])
+            self.print_log("Load weights from {}.".format(self.arg.weights))
+            if ".pkl" in self.arg.weights:
+                with open(self.arg.weights, "r") as f:
+                    weights = pickle.load(f)
+            else:
+                weights = torch.load(self.arg.weights)
+            weights = OrderedDict(
+                [
+                    [k.split("module.")[-1], v.cuda(output_device)]
+                    for k, v in weights.items()
+                ]
+            )
+            weights = remove_prefix_from_state_dict(weights, 'encoder_q.agcn.')
+            keys = list(weights.keys())
+            self.arg.ignore_weights = ['data_bn','fc','encoder_q','encoder_k','queue','queue_ptr','value_transform']
+            for w in self.arg.ignore_weights:
+                for key in keys:
+                    if w in key:
+                        if weights.pop(key, None) is not None:
+                            self.print_log(
+                                "Sucessfully Remove Weights: {}.".format(key)
+                            )
+                        else:
+                            self.print_log("Can Not Remove Weights: {}.".format(key))
+            try:
+                self.model.load_state_dict(weights)
+            except:
+                state = self.model.state_dict()
+                diff = list(set(state.keys()).difference(set(weights.keys())))
+                print("Can not find these weights:")
+                for d in diff:
+                    print("  " + d)
+                state.update(weights)
+                self.model.load_state_dict(state)
+        if type(self.arg.device) is list:
+            if len(self.arg.device) > 1:
+                self.model = nn.DataParallel(
+                    self.model, device_ids=self.arg.device, output_device=output_device
+                )
+    def load_optimizer(self):
+        if self.arg.optimizer == "SGD":
+            self.optimizer = optim.SGD(
+                self.model.parameters(),
+                lr=self.arg.base_lr,
+                momentum=0.9,
+                nesterov=self.arg.nesterov,
+                weight_decay=self.arg.weight_decay,
+            )
+        elif self.arg.optimizer == "Adam":
+            self.optimizer = optim.Adam(
+                self.model.parameters(),
+                lr=self.arg.base_lr,
+                weight_decay=self.arg.weight_decay,
+            )
+        else:
+            raise ValueError()
+    def save_arg(self):
+        # save arg
+        arg_dict = vars(self.arg)
+        if not os.path.exists(self.arg.work_dir):
+            os.makedirs(self.arg.work_dir)
+        with open("{}/config.yaml".format(self.arg.work_dir), "w") as f:
+            yaml.dump(arg_dict, f)
+    def adjust_learning_rate(self, epoch):
+        if self.arg.optimizer == "SGD" or self.arg.optimizer == "Adam":
+            if epoch < self.arg.warm_up_epoch:
+                lr = self.arg.base_lr * (epoch + 1) / self.arg.warm_up_epoch
+            else:
+                lr = self.arg.base_lr * (
+                    0.1 ** np.sum(epoch >= np.array(self.arg.step))
+                )
+            for param_group in self.optimizer.param_groups:
+                param_group["lr"] = lr
+            return lr
+        else:
+            raise ValueError()
+    def print_time(self):
+        localtime = time.asctime(time.localtime(time.time()))
+        self.print_log("Local current time :  " + localtime)
+    def print_log(self, str, print_time=True):
+        if print_time:
+            localtime = time.asctime(time.localtime(time.time()))
+            str = "[ " + localtime + " ] " + str
+        print(str)
+        if self.arg.print_log:
+            with open("{}/print_log.txt".format(self.arg.work_dir), "a") as f:
+                print(str, file=f)
+    def record_time(self):
+        self.cur_time = time.time()
+        return self.cur_time
+    def split_time(self):
+        split_time = time.time() - self.cur_time
+        self.record_time()
+        return split_time
+    def train(self, epoch, wb_dict, save_model=False):
+        self.model.train()
+        self.print_log("Training epoch: {}".format(epoch + 1))
+        loader = self.data_loader["train"]
+        self.adjust_learning_rate(epoch)
+        loss_value, batch_acc = [], []
+        self.train_writer.add_scalar("epoch", epoch, self.global_step)
+        self.record_time()
+        timer = dict(dataloader=0.001, model=0.001, statistics=0.001)
+        process = tqdm(loader)
+        if self.arg.only_train_part:
+            if epoch > self.arg.only_train_epoch:
+                print("only train part, require grad")
+                for key, value in self.model.named_parameters():
+                    if "PA" in key:
+                        value.requires_grad = True
+            else:
+                print("only train part, do not require grad")
+                for key, value in self.model.named_parameters():
+                    if "PA" in key:
+                        value.requires_grad = False
+        vid_preds = []
+        frm_preds = []
+        vid_lens = []
+        labels = []
+        results = []
+        indexs = []
+        '''
+        Switch to FULL supervision
+        Dataloader->Feeder -> collate_with_padding_multi_joint
+        '''
+        for batch_idx, (data, label, target, mask, index, soft_label) in enumerate(
+            process
+        ):
+            self.global_step += 1
+            # get data
+            data = data.float().cuda(self.output_device)
+            label = label.cuda(self.output_device)
+            target = target.cuda(self.output_device)
+            mask = mask.cuda(self.output_device)
+            soft_label = soft_label.cuda(self.output_device)
+            timer["dataloader"] += self.split_time()
+            ''' into one hot'''
+            ground_truth_flat = target.view(-1)
+            one_hot_ground_truth = F.one_hot(ground_truth_flat, num_classes=5)
+            ''' into one hot'''
+            indexs.extend(index.cpu().numpy().tolist())
+            ab_labels = torch.cat([label, torch.ones(label.size(0), 1).cuda()], -1)
+            # forward
+            # print(data.shape)
+            mil_pred, frm_scrs, mil_pred_2, frm_scrs_2 = self.model(data,mask)
+            cls_mil_loss = self.loss_nce(mil_pred.float(), ab_labels.float()) + self.loss_nce(
+                mil_pred_2.float(), ab_labels.float()
+            )
+            if epoch > -1:
+                frm_scrs_re = rearrange(frm_scrs, "n t c -> (n t) c")
+                frm_scrs_2_re = rearrange(frm_scrs_2, "n t c -> (n t) c")
+                soft_label = rearrange(soft_label, "n t c -> (n t) c")
+                loss = cls_mil_loss * 0.1 + mvl_loss(
+                    frm_scrs, frm_scrs_2, rate=0.2, weight=0.5
+                )
+                loss += cross_entropy_loss(
+                    frm_scrs_re, one_hot_ground_truth
+                ) + cross_entropy_loss(frm_scrs_2_re, one_hot_ground_truth)
+            for i in range(data.size(0)):
+                frm_scr = frm_scrs[i]
+                label_ = label[i].cpu().numpy()
+                mask_ = mask[i].cpu().numpy()
+                vid_len = mask_.sum()
+                frm_pred = F.softmax(frm_scr, -1).detach().cpu().numpy()[:vid_len]
+                vid_pred = mil_pred[i].detach().cpu().numpy()
+                results.append(frm_pred)
+                vid_preds.append(vid_pred)
+                frm_preds.append(frm_pred)
+                vid_lens.append(vid_len)
+                labels.append(label_)
+            # backward
+            self.optimizer.zero_grad()
+            loss.backward()
+            self.optimizer.step()
+            loss_value.append(loss.data.item())
+            timer["model"] += self.split_time()
+        vid_preds = np.array(vid_preds)
+        frm_preds = np.array(frm_preds)
+        vid_lens = np.array(vid_lens)
+        labels = np.array(labels)
+        loader.dataset.label_update(results, indexs)
+        cmap = cmAP(vid_preds, labels)
+        self.train_writer.add_scalar("acc", cmap, self.global_step)
+        self.train_writer.add_scalar("loss", np.mean(loss_value), self.global_step)
+        # statistics
+        self.lr = self.optimizer.param_groups[0]["lr"]
+        self.train_writer.add_scalar("lr", self.lr, self.global_step)
+        timer["statistics"] += self.split_time()
+        # statistics of time consumption and loss
+        self.print_log("\tMean training loss: {:.4f}.".format(np.mean(loss_value)))
+        self.print_log("\tAcc score: {:.3f}%".format(cmap))
+        # Log
+        wb_dict["train loss"] = np.mean(loss_value)
+        wb_dict["train acc"] = cmap
+        if save_model:
+            state_dict = self.model.state_dict()
+            weights = OrderedDict(
+                [[k.split("module.")[-1], v.cpu()] for k, v in state_dict.items()]
+            )
+            torch.save(
+                weights,
+                self.arg.model_saved_name + str(epoch) + ".pt",
+            )
+        return wb_dict
+    @torch.no_grad()
+    def eval(
+        self,
+        epoch,
+        wb_dict,
+        loader_name=["test"],
+    ):
+        self.model.eval()
+        self.print_log("Eval epoch: {}".format(epoch + 1))
+        vid_preds = []
+        frm_preds = []
+        vid_lens = []
+        labels = []
+        for ln in loader_name:
+            loss_value = []
+            step = 0
+            process = tqdm(self.data_loader[ln])
+            for batch_idx, (data, label, target, mask, index, soft_label) in enumerate(
+                process
+            ):
+                data = data.float().cuda(self.output_device)
+                label = label.cuda(self.output_device)
+                mask = mask.cuda(self.output_device)
+                ab_labels = torch.cat([label, torch.ones(label.size(0), 1).cuda()], -1)
+                # print(data.shape)
+                # forward
+                mil_pred, frm_scrs, mil_pred_2, frm_scrs_2 = self.model(data,mask)
+                # cls_mil_loss = self.loss_nce(
+                #     mil_pred, ab_labels.float()
+                # ) + self.loss_nce(mil_pred_2, ab_labels.float())
+                # loss_co = mvl_loss(frm_scrs, frm_scrs_2, rate=0.2, weight=0.5)
+                # loss = cls_mil_loss * self.arg.lambda_mil + loss_co
+                '''Loc LOSS'''
+                target = target.cuda(self.output_device)
+                ''' into one hot'''
+                ground_truth_flat = target.view(-1)
+                one_hot_ground_truth = F.one_hot(ground_truth_flat, num_classes=5)
+                ''' into one hot'''
+                frm_scrs_re = rearrange(frm_scrs, "n t c -> (n t) c")
+                frm_scrs_2_re = rearrange(frm_scrs_2, "n t c -> (n t) c")
+                '''Loc LOSS'''
+                '''Loc LOSS'''
+                loss = cross_entropy_loss(
+                    frm_scrs_re, one_hot_ground_truth
+                ) + cross_entropy_loss(frm_scrs_2_re, one_hot_ground_truth)
+                '''Loc LOSS'''
+                loss_value.append(loss.data.item())
+                for i in range(data.size(0)):
+                    frm_scr = frm_scrs[i]
+                    vid_pred = mil_pred[i]
+                    label_ = label[i].cpu().numpy()
+                    mask_ = mask[i].cpu().numpy()
+                    vid_len = mask_.sum()
+                    frm_pred = F.softmax(frm_scr, -1).cpu().numpy()[:vid_len]
+                    vid_pred = vid_pred.cpu().numpy()
+                    vid_preds.append(vid_pred)
+                    frm_preds.append(frm_pred)
+                    vid_lens.append(vid_len)
+                    labels.append(label_)
+                step += 1
+            vid_preds = np.array(vid_preds)
+            frm_preds = np.array(frm_preds)
+            vid_lens = np.array(vid_lens)
+            labels = np.array(labels)
+            cmap = cmAP(vid_preds, labels)
+            score = cmap
+            loss = np.mean(loss_value)
+            dmap, iou = dsmAP(
+                vid_preds,
+                frm_preds,
+                vid_lens,
+                self.arg.test_feeder_args["data_path"],
+                self.arg,
+                multi=True,
+            )
+            print("Classification map %f" % cmap)
+            for item in list(zip(iou, dmap)):
+                print("Detection map @ %f = %f" % (item[0], item[1]))
+            self.my_logger.append([epoch + 1, cmap] + dmap+ [np.mean(dmap)])
+            wb_dict["val loss"] = loss
+            wb_dict["val acc"] = score
+            if score > self.best_acc:
+                self.best_acc = score
+            print("Acc score: ", score, " model: ", self.arg.model_saved_name)
+            if self.arg.phase == "train":
+                self.val_writer.add_scalar("loss", loss, self.global_step)
+                self.val_writer.add_scalar("acc", score, self.global_step)
+            self.print_log(
+                "\tMean {} loss of {} batches: {}.".format(
+                    ln, len(self.data_loader[ln]), np.mean(loss_value)
+                )
+            )
+            self.print_log("\tAcc score: {:.3f}%".format(score))
+        return wb_dict
+    def start(self):
+        wb_dict = {}
+        if self.arg.phase == "train":
+            self.print_log("Parameters:\n{}\n".format(str(vars(self.arg))))
+            self.global_step = (
+                self.arg.start_epoch
+                * len(self.data_loader["train"])
+                / self.arg.batch_size
+            )
+            for epoch in range(self.arg.start_epoch, self.arg.num_epoch):
+                save_model = ((epoch + 1) % self.arg.save_interval == 0) or (
+                    epoch + 1 == self.arg.num_epoch
+                )
+                wb_dict = {"lr": self.lr}
+                # Train
+                wb_dict = self.train(epoch, wb_dict, save_model=save_model)
+                if epoch%5==0:
+                # Eval. on val set
+                    wb_dict = self.eval(epoch, wb_dict, loader_name=["test"])
+                    # Log stats. for this epoch
+                    print("Epoch: {0}\nMetrics: {1}".format(epoch, wb_dict))
+            print(
+                "best accuracy: ",
+                self.best_acc,
+                " model_name: ",
+                self.arg.model_saved_name,
+            )
+        elif self.arg.phase == "test":
+            if not self.arg.test_feeder_args["debug"]:
+                wf = self.arg.model_saved_name + "_wrong.txt"
+                rf = self.arg.model_saved_name + "_right.txt"
+            else:
+                wf = rf = None
+            if self.arg.weights is None:
+                raise ValueError("Please appoint --weights.")
+            self.arg.print_log = False
+            self.print_log("Model:   {}.".format(self.arg.model))
+            self.print_log("Weights: {}.".format(self.arg.weights))
+            wb_dict = self.eval(
+                epoch=0,
+                wb_dict=wb_dict,
+                loader_name=["test"],
+                wrong_file=wf,
+                result_file=rf,
+            )
+            print("Inference metrics: ", wb_dict)
+            self.print_log("Done.\n")
+def str2bool(v):
+    if v.lower() in ("yes", "true", "t", "y", "1"):
+        return True
+    elif v.lower() in ("no", "false", "f", "n", "0"):
+        return False
+    else:
+        raise argparse.ArgumentTypeError("Boolean value expected.")
+def import_class(name):
+    components = name.split(".")
+    mod = __import__(components[0])
+    for comp in components[1:]:
+        mod = getattr(mod, comp)
+    return mod
+if __name__ == "__main__":
+    parser = get_parser()
+    # load arg form config file
+    p = parser.parse_args()
+    if p.config is not None:
+        with open(p.config, "r") as f:
+            default_arg = yaml.safe_load(f)
+        key = vars(p).keys()
+        for k in default_arg.keys():
+            if k not in key:
+                print("WRONG ARG: {}".format(k))
+                assert k in key
+        parser.set_defaults(**default_arg)
+    arg = parser.parse_args()
+    print("BABEL Action Recognition")
+    print("Config: ", arg)
+    init_seed(arg.seed)
+    processor = Processor(arg)
+    processor.start()

utils/__init__.py ADDED Viewed

File without changes

utils/logger.py ADDED Viewed

	@@ -0,0 +1,135 @@

+# A simple torch style logger
+# (C) Wei YANG 2017
+from __future__ import absolute_import
+import os
+import sys
+import matplotlib.pyplot as plt
+import numpy as np
+__all__ = ["Logger", "LoggerMonitor", "savefig"]
+def savefig(fname, dpi=None):
+    dpi = 150 if dpi == None else dpi
+    plt.savefig(fname, dpi=dpi)
+def plot_overlap(logger, names=None):
+    names = logger.names if names == None else names
+    numbers = logger.numbers
+    for _, name in enumerate(names):
+        x = np.arange(len(numbers[name]))
+        plt.plot(x, np.asarray(numbers[name]))
+    return [logger.title + "(" + name + ")" for name in names]
+class Logger(object):
+    """Save training process to log file with simple plot function."""
+    def __init__(self, fpath, title=None, resume=False):
+        self.file = None
+        self.resume = resume
+        self.title = "" if title == None else title
+        if fpath is not None:
+            if resume:
+                self.file = open(fpath, "r")
+                name = self.file.readline()
+                self.names = name.rstrip().split("\t")
+                self.numbers = {}
+                for _, name in enumerate(self.names):
+                    self.numbers[name] = []
+                for numbers in self.file:
+                    numbers = numbers.rstrip().split("\t")
+                    for i in range(0, len(numbers)):
+                        self.numbers[self.names[i]].append(numbers[i])
+                self.file.close()
+                self.file = open(fpath, "a")
+            else:
+                self.file = open(fpath, "w")
+    def set_names(self, names):
+        if self.resume:
+            pass
+        # initialize numbers as empty list
+        self.numbers = {}
+        self.names = names
+        for _, name in enumerate(self.names):
+            self.file.write(name)
+            self.file.write("\t")
+            self.numbers[name] = []
+        self.file.write("\n")
+        self.file.flush()
+    def append(self, numbers):
+        assert len(self.names) == len(numbers), "Numbers do not match names"
+        for index, num in enumerate(numbers):
+            self.file.write("{0:.6f}".format(num))
+            self.file.write("\t")
+            self.numbers[self.names[index]].append(num)
+        self.file.write("\n")
+        self.file.flush()
+    def plot(self, names=None):
+        names = self.names if names == None else names
+        numbers = self.numbers
+        for _, name in enumerate(names):
+            x = np.arange(len(numbers[name]))
+            plt.plot(x, np.asarray(numbers[name]))
+        plt.legend([self.title + "(" + name + ")" for name in names])
+        plt.grid(True)
+    def close(self):
+        if self.file is not None:
+            self.file.close()
+class LoggerMonitor(object):
+    """Load and visualize multiple logs."""
+    def __init__(self, paths):
+        """paths is a distionary with {name:filepath} pair"""
+        self.loggers = []
+        for title, path in paths.items():
+            logger = Logger(path, title=title, resume=True)
+            self.loggers.append(logger)
+    def plot(self, names=None):
+        plt.figure()
+        plt.subplot(121)
+        legend_text = []
+        for logger in self.loggers:
+            legend_text += plot_overlap(logger, names)
+        plt.legend(legend_text, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0)
+        plt.grid(True)
+if __name__ == "__main__":
+    # # Example
+    # logger = Logger('test.txt')
+    # logger.set_names(['Train loss', 'Valid loss','Test loss'])
+    # length = 100
+    # t = np.arange(length)
+    # train_loss = np.exp(-t / 10.0) + np.random.rand(length) * 0.1
+    # valid_loss = np.exp(-t / 10.0) + np.random.rand(length) * 0.1
+    # test_loss = np.exp(-t / 10.0) + np.random.rand(length) * 0.1
+    # for i in range(0, length):
+    #     logger.append([train_loss[i], valid_loss[i], test_loss[i]])
+    # logger.plot()
+    # Example: logger monitor
+    paths = {
+        "resadvnet20": "/home/wyang/code/pytorch-classification/checkpoint/cifar10/resadvnet20/log.txt",
+        "resadvnet32": "/home/wyang/code/pytorch-classification/checkpoint/cifar10/resadvnet32/log.txt",
+        "resadvnet44": "/home/wyang/code/pytorch-classification/checkpoint/cifar10/resadvnet44/log.txt",
+    }
+    field = ["Valid Acc."]
+    monitor = LoggerMonitor(paths)
+    monitor.plot(names=field)
+    savefig("test.eps")