add: 新增多线程批处理方式
This commit is contained in:
parent
a3cd4c9465
commit
61506ffaee
26
README.md
26
README.md
|
@ -118,6 +118,32 @@ python3 main.py --dir_path "example/movies","example/tvs" --output data/metadata
|
|||
nohup python3 main.py > nohup.log 2>&1 & echo &! > run.pid
|
||||
```
|
||||
|
||||
### 多线程刮削
|
||||
> 前置要求:需要先执行main.py脚本的"collect"模式收集nfo元数据文件
|
||||
|
||||
#### 1. 直接修改脚本文件方式
|
||||
1. 修改 `multi_thread.py` 文件中 `if __name__ == '__main__':` 方法中 `__dir_path` 、 `__output` 、 `__tmdb_token` 、 `__mode`参数值
|
||||
2. 执行脚本
|
||||
```python
|
||||
python3 multi_thread.py
|
||||
```
|
||||
|
||||
#### 2. 命令行执行
|
||||
> 注意参数 `--dir_path` 的值如果需要配置多个,请使用英文半角逗号拼接,不要有空格
|
||||
|
||||
```python
|
||||
python3 multi_thread.py --dir_path "example/movies","example/tvs" --output data/metadata/person --tmdb_token tmdb_token
|
||||
```
|
||||
|
||||
#### 3. 后台执行
|
||||
> 可以结合前两种执行方式使用
|
||||
|
||||
```shell
|
||||
nohup python3 multi_thread.py > nohup.log 2>&1 & echo &! > run.pid
|
||||
```
|
||||
|
||||
|
||||
|
||||
### 补充
|
||||
1. 运行提示 `no module name requests` 但是实际python环境中又安装了的:
|
||||
* 查看当前执行的python版本:```python --version```
|
||||
|
|
57
main.py
57
main.py
|
@ -5,7 +5,7 @@ import sys
|
|||
|
||||
from utils.collect_metadata import __collect_nfo
|
||||
from utils.redo import __redo, __check
|
||||
from utils.scrape import __execute
|
||||
from utils.scrape import Scrape
|
||||
from utils.LoggerUtil import Logger
|
||||
|
||||
|
||||
|
@ -82,6 +82,12 @@ def __get_sys_args(log):
|
|||
"collect(元数据文件转移)/scrape(元数据刮削)/redo(重新刮削异常元数据)"))
|
||||
raise SystemExit(1)
|
||||
arg_json["__mode"] = mode_value
|
||||
if "--language" not in arg_key.keys():
|
||||
log.logger.warn("未输入脚本执行语言,默认使用中文简体语言格式:{0}".format("--language"))
|
||||
arg_json["__language"] = "zh-CN"
|
||||
else:
|
||||
mode_value = sys.argv[arg_key["--language"] + 1]
|
||||
arg_json["__language"] = mode_value
|
||||
return arg_json
|
||||
|
||||
|
||||
|
@ -92,18 +98,40 @@ def __create_default_dirs():
|
|||
os.makedirs("./redo")
|
||||
|
||||
|
||||
def __master_execute(log, dir_path, output, tmdb_token, mode, language="zh-CN"):
|
||||
# 检查python版本
|
||||
__check_version(log=log)
|
||||
# 开始执行主程序
|
||||
__create_default_dirs()
|
||||
# 默认 language="zh-CN" (简体中文),可以通过修改 "language" 的值变更获取元数据的语言类别
|
||||
for __real_dir_path in dir_path:
|
||||
if "collect" == mode:
|
||||
__collect_nfo(log, __real_dir_path, output)
|
||||
if "scrape" == mode:
|
||||
# 删除异常信息存储文件
|
||||
error_file_path = "./error_tmdb_ids.txt"
|
||||
if os.path.exists(error_file_path):
|
||||
os.remove(error_file_path)
|
||||
scrape = Scrape(log=log, dir_path=__real_dir_path, output=output, tmdb_token=tmdb_token, language=language)
|
||||
scrape.start()
|
||||
if "redo" == mode:
|
||||
__redo(log=log, output=output, tmdb_token=tmdb_token, language=language)
|
||||
if "check" == mode:
|
||||
__check(scan_path=output)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 初始化日志
|
||||
__log = __init_logger()
|
||||
sys_args = __get_sys_args(log=__log)
|
||||
# 扫描目录
|
||||
# __dir_path = ["/volume2/video/animation", "/volume2/video/children", "/volume2/video/documentary", "/volume2/video/movies", "/volume2/video/tvs", "/volume2/video/variety"]
|
||||
__dir_path = ["/Users/liuxuewen/workspace/self/gitea/tmdb-person/data/metadata/nfo"]
|
||||
__dir_path = ["/data/tmdb-person/data/metadata/nfo"]
|
||||
# 输出演员元数据目录
|
||||
__output = "/Users/liuxuewen/workspace/self/gitea/tmdb-person/data/metadata/person"
|
||||
__output = "/data/tmdb-person/data/metadata/person"
|
||||
# TMDB API TOKEN
|
||||
__tmdb_token = "eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiIxYTU4ODAxMGY5OTUwYWEyNThhYjFhYjJlMjI4NGVmYSIsInN1YiI6IjYxYmRmOGNjMzgzZGYyMDA0MjIzNDhjOSIsInNjb3BlcyI6WyJhcGlfcmVhZCJdLCJ2ZXJzaW9uIjoxfQ.RPG8F8AELlK7MgrXDR2U0YRv61VteZZ9ponilnkQqkE"
|
||||
__mode = "scrape"
|
||||
__language = "zh-CN"
|
||||
if len(sys_args.keys()) > 0:
|
||||
# 扫描目录
|
||||
__dir_path = sys_args["__dir_path"]
|
||||
|
@ -112,21 +140,6 @@ if __name__ == '__main__':
|
|||
# TMDB API TOKEN
|
||||
__tmdb_token = sys_args["__tmdb_token"]
|
||||
__mode = sys_args["__mode"]
|
||||
# 检查python版本
|
||||
__check_version(log=__log)
|
||||
# 开始执行主程序
|
||||
__create_default_dirs()
|
||||
# 默认 language="zh-CN" (简体中文),可以通过修改 "language" 的值变更获取元数据的语言类别
|
||||
for __real_dir_path in __dir_path:
|
||||
if "collect" == __mode:
|
||||
__collect_nfo(__log, __real_dir_path, __output)
|
||||
if "scrape" == __mode:
|
||||
# 删除异常信息存储文件
|
||||
error_file_path = "./error_tmdb_ids.txt"
|
||||
if os.path.exists(error_file_path):
|
||||
os.remove(error_file_path)
|
||||
__execute(log=__log, dir_path=__real_dir_path, output=__output, tmdb_token=__tmdb_token)
|
||||
if "redo" == __mode:
|
||||
__redo(log=__log, output=__output, tmdb_token=__tmdb_token)
|
||||
if "check" == __mode:
|
||||
__check(scan_path=__output)
|
||||
__language = sys_args["__language"]
|
||||
__master_execute(log=__log, dir_path=__dir_path, output=__output, tmdb_token=__tmdb_token, mode=__mode,
|
||||
language=__language)
|
||||
|
|
61
multi_thread.py
Normal file
61
multi_thread.py
Normal file
|
@ -0,0 +1,61 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# 多线程模式执行脚本
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from main import __init_logger, __get_sys_args, __check_version, __create_default_dirs
|
||||
from utils.scrape import Scrape
|
||||
|
||||
|
||||
def __cut_dirs(log, dir_path, output):
|
||||
log.logger.info("开始执行元数据文件分组:{0}".format(dir_path))
|
||||
# 默认 language="zh-CN" (简体中文),可以通过修改 "language" 的值变更获取元数据的语言类别
|
||||
# 将nfo文件根据首字母小些切分成不同的文件夹
|
||||
nfo_list = os.path.join(os.path.dirname(output), "nfo_list")
|
||||
if not os.path.exists(nfo_list):
|
||||
os.makedirs(nfo_list)
|
||||
for nfo_file_dir in dir_path:
|
||||
for nfo_file in os.listdir(nfo_file_dir):
|
||||
__name = nfo_file[0].lower()
|
||||
__path_dir = os.path.join(nfo_list, __name)
|
||||
if not os.path.exists(__path_dir):
|
||||
os.makedirs(__path_dir)
|
||||
shutil.copyfile(os.path.join(nfo_file_dir, nfo_file), os.path.join(__path_dir, nfo_file))
|
||||
log.logger.info("结束执行元数据文件分组:{0}".format(nfo_list))
|
||||
return nfo_list
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 初始化日志
|
||||
__log = __init_logger()
|
||||
sys_args = __get_sys_args(log=__log)
|
||||
# 扫描目录
|
||||
__dir_path = ["data/metadata/nfo"]
|
||||
# 输出演员元数据目录
|
||||
__output = "data/metadata/person"
|
||||
# TMDB API TOKEN
|
||||
__tmdb_token = "eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiIxYTU4ODAxMGY5OTUwYWEyNThhYjFhYjJlMjI4NGVmYSIsInN1YiI6IjYxYmRmOGNjMzgzZGYyMDA0MjIzNDhjOSIsInNjb3BlcyI6WyJhcGlfcmVhZCJdLCJ2ZXJzaW9uIjoxfQ.RPG8F8AELlK7MgrXDR2U0YRv61VteZZ9ponilnkQqkE"
|
||||
__mode = "scrape"
|
||||
__language = "zh-CN"
|
||||
if len(sys_args.keys()) > 0:
|
||||
# 扫描目录
|
||||
__dir_path = sys_args["__dir_path"]
|
||||
# 输出演员元数据目录
|
||||
__output = sys_args["__output"]
|
||||
# TMDB API TOKEN
|
||||
__tmdb_token = sys_args["__tmdb_token"]
|
||||
__language = sys_args["__language"]
|
||||
# 检查python版本
|
||||
__check_version(log=__log)
|
||||
# 开始执行主程序
|
||||
__create_default_dirs()
|
||||
__nfo_list = __cut_dirs(log=__log, dir_path=__dir_path, output=__output)
|
||||
# 删除异常信息存储文件
|
||||
error_file_path = "./error_tmdb_ids.txt"
|
||||
if os.path.exists(error_file_path):
|
||||
os.remove(error_file_path)
|
||||
for dir_name in os.listdir(__nfo_list):
|
||||
scrape = Scrape(log=__log, dir_path=os.path.join(__nfo_list, dir_name), output=__output,
|
||||
tmdb_token=__tmdb_token, language=__language)
|
||||
scrape.start()
|
105
utils/scrape.py
105
utils/scrape.py
|
@ -2,53 +2,68 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
import shutil
|
||||
import threading
|
||||
import uuid
|
||||
|
||||
from utils.analyze import Analyze
|
||||
from utils.tmdb import Tmdb
|
||||
|
||||
thread_lock = threading.Lock()
|
||||
|
||||
def __execute(log, dir_path, output, tmdb_token, language="zh-CN"):
|
||||
log.logger.info("------------------- 开始获取演员元数据及海报 -------------------")
|
||||
__file_paths = []
|
||||
log.logger.info("当前执行元数据刮削识别的根文件夹:{0}".format(dir_path))
|
||||
for folder in os.listdir(dir_path):
|
||||
__folder2 = os.path.join(dir_path, folder)
|
||||
# 判断是否文件夹
|
||||
if os.path.isdir(__folder2):
|
||||
for nfo_file in os.listdir(__folder2):
|
||||
__child_file_path = os.path.join(__folder2, nfo_file)
|
||||
if ".nfo" in os.path.basename(__child_file_path):
|
||||
__file_paths.append(__child_file_path)
|
||||
elif os.path.isfile(__folder2):
|
||||
__file_name = os.path.basename(__folder2)
|
||||
if ".nfo" in __file_name:
|
||||
__file_paths.append(__folder2)
|
||||
for __file_path in __file_paths:
|
||||
log.logger.info("开始处理元数据刮削识别:{0}".format(__file_path))
|
||||
# __file_path = "example/神出鬼没 (2023) - 2160p.nfo"
|
||||
__nfo_data = Analyze(file_path=__file_path).analyze()
|
||||
for __actor in __nfo_data["actors"]:
|
||||
log.logger.info("当前解析的演员信息: {0}".format(__actor))
|
||||
if "tmdbid" in __actor.keys():
|
||||
__tmdbid = __actor["tmdbid"]
|
||||
__actor_name = __actor["name"]
|
||||
__name = __actor_name[0].lower()
|
||||
__full_actor_name = __actor_name + "-tmdb-" + __tmdbid
|
||||
__path_dir = os.path.join(output, __name, __full_actor_name)
|
||||
if not os.path.exists(__path_dir):
|
||||
os.makedirs(__path_dir)
|
||||
# 如果存在元数据则不再进行刮削
|
||||
if "person.nfo" not in os.listdir(__path_dir):
|
||||
Tmdb(log=log, tmdb_id=__tmdbid, actor_path=__path_dir, tmdb_token=tmdb_token,
|
||||
language=language).create_actor_nfo()
|
||||
else:
|
||||
log.logger.info("当前路径已存在person.nfo文件, 跳过刮削:{0}".format(__path_dir))
|
||||
# 如果存在海报则不再进行刮削
|
||||
if "folder.jpg" not in os.listdir(__path_dir):
|
||||
Tmdb(log=log, tmdb_id=__tmdbid, actor_path=__path_dir, tmdb_token=tmdb_token,
|
||||
language=language).get_actor_image()
|
||||
else:
|
||||
log.logger.info("当前路径已存在folder.jpg文件, 跳过刮削:{0}".format(__path_dir))
|
||||
# 移动完成刮削的nfo文件到complete文件夹
|
||||
shutil.move(__file_path, "complete/")
|
||||
log.logger.info("------------------- 结束获取演员元数据及海报 -------------------")
|
||||
class Scrape(threading.Thread):
|
||||
def __init__(self, log, dir_path, output, tmdb_token, language="zh-CN", thread_id=uuid.uuid4()):
|
||||
threading.Thread.__init__(self)
|
||||
self.thread_id = thread_id
|
||||
self.log = log
|
||||
self.dir_path = dir_path
|
||||
self.output = output
|
||||
self.tmdb_token = tmdb_token
|
||||
self.language = language
|
||||
|
||||
def run(self):
|
||||
self.log.logger.info("------------------- 开始获取演员元数据及海报:{0} -------------------".format(self.thread_id))
|
||||
__file_paths = []
|
||||
self.log.logger.info("当前执行元数据刮削识别的根文件夹:{0}".format(self.dir_path))
|
||||
for folder in os.listdir(self.dir_path):
|
||||
__folder2 = os.path.join(self.dir_path, folder)
|
||||
# 判断是否文件夹
|
||||
if os.path.isdir(__folder2):
|
||||
for nfo_file in os.listdir(__folder2):
|
||||
__child_file_path = os.path.join(__folder2, nfo_file)
|
||||
if ".nfo" in os.path.basename(__child_file_path):
|
||||
__file_paths.append(__child_file_path)
|
||||
elif os.path.isfile(__folder2):
|
||||
__file_name = os.path.basename(__folder2)
|
||||
if ".nfo" in __file_name:
|
||||
__file_paths.append(__folder2)
|
||||
for __file_path in __file_paths:
|
||||
self.log.logger.info("开始处理元数据刮削识别:{0}".format(__file_path))
|
||||
# __file_path = "example/神出鬼没 (2023) - 2160p.nfo"
|
||||
__nfo_data = Analyze(file_path=__file_path).analyze()
|
||||
for __actor in __nfo_data["actors"]:
|
||||
self.log.logger.info("当前解析的演员信息: {0}".format(__actor))
|
||||
if "tmdbid" in __actor.keys():
|
||||
__tmdbid = __actor["tmdbid"]
|
||||
__actor_name = __actor["name"]
|
||||
__name = __actor_name[0].lower()
|
||||
__full_actor_name = __actor_name + "-tmdb-" + __tmdbid
|
||||
__path_dir = os.path.join(self.output, __name, __full_actor_name)
|
||||
thread_lock.acquire()
|
||||
if not os.path.exists(__path_dir):
|
||||
os.makedirs(__path_dir)
|
||||
thread_lock.release()
|
||||
# 如果存在元数据则不再进行刮削
|
||||
if "person.nfo" not in os.listdir(__path_dir):
|
||||
Tmdb(log=self.log, tmdb_id=__tmdbid, actor_path=__path_dir, tmdb_token=self.tmdb_token,
|
||||
language=self.language).create_actor_nfo()
|
||||
else:
|
||||
self.log.logger.info("当前路径已存在person.nfo文件, 跳过刮削:{0}".format(__path_dir))
|
||||
# 如果存在海报则不再进行刮削
|
||||
if "folder.jpg" not in os.listdir(__path_dir):
|
||||
Tmdb(log=self.log, tmdb_id=__tmdbid, actor_path=__path_dir, tmdb_token=self.tmdb_token,
|
||||
language=self.language).get_actor_image()
|
||||
else:
|
||||
self.log.logger.info("当前路径已存在folder.jpg文件, 跳过刮削:{0}".format(__path_dir))
|
||||
# 移动完成刮削的nfo文件到complete文件夹
|
||||
shutil.move(__file_path, "complete/")
|
||||
self.log.logger.info("------------------- 结束获取演员元数据及海报:{0} -------------------".format(self.thread_id))
|
||||
|
|
Loading…
Reference in New Issue
Block a user