tmdb-person/person.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import requests
import os
import xml.etree.ElementTree as ET
import json
from utils.LoggerUtil import Logger


def __init_logger(log_file="tmdb.log", level="info", back_count=3):
    """
    服务日志记录对象
    :param log_file: 日志文件名
    :param level: 日志记录级别。debug info warning error crit
    :param back_count: 日志文件备份天数
    :return: 日志对象
    """
    # 获取当前文件路径
    current_path = os.path.abspath(__file__)
    # 获取当前文件的父目录
    father_path = os.path.abspath(os.path.dirname(current_path) + os.path.sep + ".")
    # (必填)日志文件名  log_file="/data/ws_env/logtest/process.log"
    log_file_abspath = os.path.join(father_path, "logs", log_file)
    return Logger(log_file_abspath, level=level, backCount=back_count)


class Analyze:
    def __init__(self, file_path):
        self.file_path = file_path

    def analyze(self):
        tree = ET.parse(self.file_path)
        root = tree.getroot()
        if "tvshow" in root.tag:
            data_json = {}
            self.__list_nodes(root, data_json)
            return data_json
        if "movie" in root.tag:
            data_json = {}
            self.__list_nodes(root, data_json)
            return data_json

    def __list_nodes(self, root, data):
        actors = []
        for node in root:
            if 0 == len(list(node)):
                data[node.tag] = node.text
            elif "actor" == node.tag:
                actor_json = {}
                self.__list_nodes(node, actor_json)
                actors.append(actor_json)
        data["actors"] = actors


class Tmdb:
    def __init__(self, tmdb_id, actor_path, tmdb_token):
        self.image_path = None
        self.tmdb_id = tmdb_id
        self.actor_path = actor_path
        self.header = {
            "accept": "application/json",
            "Authorization": "Bearer " + tmdb_token
        }

    def get_actor_info(self):
        url = "https://api.themoviedb.org/3/person/" + self.tmdb_id + "?language=zh-CN"
        headers = self.header
        response = requests.get(url, headers=headers)
        log.logger.info("当前刮削到的演员元数据:{0}".format(response.text))
        return response.text

    def get_actor_image(self):
        image_path = json.loads(self.get_actor_info())["profile_path"]
        if None is not image_path:
            url = 'https://www.themoviedb.org/t/p/original' + image_path
            response = requests.get(url)
            if response.status_code == 200:
                suffix = image_path.split(".")[1]
                with open(os.path.join(self.actor_path, "folder." + suffix), 'wb') as f:
                    f.write(response.content)

    def __translations(self):
        url = "https://api.themoviedb.org/3/person/" + self.tmdb_id + "/translations"
        headers = self.header
        response = requests.get(url, headers=headers)
        return response.text

    def __get_actor_plot(self):
        translations = self.__translations()
        translations_list = json.loads(translations)["translations"]
        translations_json = {}
        for translation in translations_list:
            translations_json[translation["iso_3166_1"]] = translation
        plot = ""
        if "CN" in translations_json.keys():
            zh = translations_json["CN"]
            plot = zh["data"]["biography"]
        elif "US" in translations_json.keys():
            us = translations_json["US"]
            plot = us["data"]["biography"]
        return plot

    def create_actor_nfo(self):
        plot = self.__get_actor_plot()


def __execute(dir_path, output, tmdb_token):
    __file_paths = []
    log.logger.info("当前执行元数据刮削识别的根文件夹:{0}".format(dir_path))
    for folder in os.listdir(dir_path):
        __folder2 = os.path.join(dir_path, folder)
        # 判断是否文件夹
        if os.path.isdir(__folder2):
            for nfo_file in os.listdir(__folder2):
                __child_file_path = os.path.join(__folder2, nfo_file)
                if ".nfo" in os.path.basename(__child_file_path):
                    __file_paths.append(__child_file_path)
        elif os.path.isfile(__folder2):
            __file_name = os.path.basename(__folder2)
            if ".nfo" in __file_name:
                __file_paths.append(__folder2)
    for __file_path in __file_paths:
        log.logger.info("开始处理元数据刮削识别:{0}".format(__file_path))
        # __file_path = "example/神出鬼没 (2023) - 2160p.nfo"
        __nfo_data = Analyze(file_path=__file_path).analyze()
        for __actor in __nfo_data["actors"]:
            __tmdbid = __actor["tmdbid"]
            __actor_name = __actor["name"]
            __name = __actor_name[1].lower()
            __full_actor_name = __actor_name + "-tmdb-" + __tmdbid
            __path_dir = os.path.join(output, __name, __full_actor_name)
            if not os.path.exists(__path_dir):
                os.makedirs(__path_dir)
            # 如果存在元数据则不再进行刮削
            if "person.nfo" not in os.listdir(__path_dir):
                Tmdb(tmdb_id=__tmdbid, actor_path=__path_dir, tmdb_token=tmdb_token).get_actor_info()
            # 如果存在海报则不再进行刮削
            if "folder.jpg" not in os.listdir(__path_dir):
                Tmdb(tmdb_id=__tmdbid, actor_path=__path_dir, tmdb_token=tmdb_token).get_actor_image()


if __name__ == '__main__':
    # 扫描目录
    __dir_path = "example/movies"
    # 输出演员元数据目录
    __output = "data/metadata/person"
    # TMDB API TOKEN
    __tmdb_token = "eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiIxYTU4ODAxMGY5OTUwYWEyNThhYjFhYjJlMjI4NGVmYSIsInN1YiI6IjYxYmRmOGNjMzgzZGYyMDA0MjIzNDhjOSIsInNjb3BlcyI6WyJhcGlfcmVhZCJdLCJ2ZXJzaW9uIjoxfQ.RPG8F8AELlK7MgrXDR2U0YRv61VteZZ9ponilnkQqkE"
    # 初始化日志
    log = __init_logger()
    # 开始执行主程序
    __execute(dir_path=__dir_path, output=__output, tmdb_token=__tmdb_token)
add: 刮削演员信息并生成相应目录结构和演员图片 2023-09-05 17:19:22 +08:00			`#!/usr/bin/env python`
			`# -- coding: utf-8 --`
			`import requests`
			`import os`
			`import xml.etree.ElementTree as ET`
			`import json`
add: 新增日志类和时间类 2023-09-06 14:20:00 +08:00			`from utils.LoggerUtil import Logger`


			`def __init_logger(log_file="tmdb.log", level="info", back_count=3):`
			`"""`
			`服务日志记录对象`
			`:param log_file: 日志文件名`
			`:param level: 日志记录级别。debug info warning error crit`
			`:param back_count: 日志文件备份天数`
			`:return: 日志对象`
			`"""`
			`# 获取当前文件路径`
			`current_path = os.path.abspath(__file__)`
			`# 获取当前文件的父目录`
			`father_path = os.path.abspath(os.path.dirname(current_path) + os.path.sep + ".")`
			`# (必填)日志文件名 log_file="/data/ws_env/logtest/process.log"`
			`log_file_abspath = os.path.join(father_path, "logs", log_file)`
			`return Logger(log_file_abspath, level=level, backCount=back_count)`
add: 刮削演员信息并生成相应目录结构和演员图片 2023-09-05 17:19:22 +08:00

			`class Analyze:`
			`def __init__(self, file_path):`
			`self.file_path = file_path`

			`def analyze(self):`
			`tree = ET.parse(self.file_path)`
			`root = tree.getroot()`
			`if "tvshow" in root.tag:`
			`data_json = {}`
			`self.__list_nodes(root, data_json)`
			`return data_json`
			`if "movie" in root.tag:`
			`data_json = {}`
			`self.__list_nodes(root, data_json)`
			`return data_json`

			`def __list_nodes(self, root, data):`
			`actors = []`
			`for node in root:`
			`if 0 == len(list(node)):`
			`data[node.tag] = node.text`
			`elif "actor" == node.tag:`
			`actor_json = {}`
			`self.__list_nodes(node, actor_json)`
			`actors.append(actor_json)`
			`data["actors"] = actors`


			`class Tmdb:`
add: 补充README.md说明 change: 优化person.py代码 2023-09-06 10:59:34 +08:00			`def __init__(self, tmdb_id, actor_path, tmdb_token):`
add: 刮削演员信息并生成相应目录结构和演员图片 2023-09-05 17:19:22 +08:00			`self.image_path = None`
			`self.tmdb_id = tmdb_id`
			`self.actor_path = actor_path`
add: 新增日志类和时间类 2023-09-06 14:20:00 +08:00			`self.header = {`
			`"accept": "application/json",`
			`"Authorization": "Bearer " + tmdb_token`
			`}`
add: 刮削演员信息并生成相应目录结构和演员图片 2023-09-05 17:19:22 +08:00
			`def get_actor_info(self):`
			`url = "https://api.themoviedb.org/3/person/" + self.tmdb_id + "?language=zh-CN"`
add: 新增日志类和时间类 2023-09-06 14:20:00 +08:00			`headers = self.header`
add: 刮削演员信息并生成相应目录结构和演员图片 2023-09-05 17:19:22 +08:00			`response = requests.get(url, headers=headers)`
add: 新增日志类和时间类 2023-09-06 14:20:00 +08:00			`log.logger.info("当前刮削到的演员元数据:{0}".format(response.text))`
add: 刮削演员信息并生成相应目录结构和演员图片 2023-09-05 17:19:22 +08:00			`return response.text`

			`def get_actor_image(self):`
			`image_path = json.loads(self.get_actor_info())["profile_path"]`
add: 刮削演员信息并生成相应目录结构和演员图片 2023-09-06 10:08:48 +08:00			`if None is not image_path:`
			`url = 'https://www.themoviedb.org/t/p/original' + image_path`
			`response = requests.get(url)`
			`if response.status_code == 200:`
			`suffix = image_path.split(".")[1]`
			`with open(os.path.join(self.actor_path, "folder." + suffix), 'wb') as f:`
			`f.write(response.content)`
add: 刮削演员信息并生成相应目录结构和演员图片 2023-09-05 17:19:22 +08:00
add: 新增日志类和时间类 2023-09-06 14:20:00 +08:00			`def __translations(self):`
			`url = "https://api.themoviedb.org/3/person/" + self.tmdb_id + "/translations"`
			`headers = self.header`
			`response = requests.get(url, headers=headers)`
			`return response.text`

			`def __get_actor_plot(self):`
			`translations = self.__translations()`
			`translations_list = json.loads(translations)["translations"]`
			`translations_json = {}`
			`for translation in translations_list:`
			`translations_json[translation["iso_3166_1"]] = translation`
			`plot = ""`
			`if "CN" in translations_json.keys():`
			`zh = translations_json["CN"]`
			`plot = zh["data"]["biography"]`
			`elif "US" in translations_json.keys():`
			`us = translations_json["US"]`
			`plot = us["data"]["biography"]`
			`return plot`

			`def create_actor_nfo(self):`
			`plot = self.__get_actor_plot()`

add: 刮削演员信息并生成相应目录结构和演员图片 2023-09-05 17:19:22 +08:00
add: 补充README.md说明 change: 优化person.py代码 2023-09-06 10:59:34 +08:00			`def __execute(dir_path, output, tmdb_token):`
add: 刮削演员信息并生成相应目录结构和演员图片 2023-09-06 09:48:24 +08:00			`__file_paths = []`
add: 新增日志类和时间类 2023-09-06 14:20:00 +08:00			`log.logger.info("当前执行元数据刮削识别的根文件夹:{0}".format(dir_path))`
add: 补充README.md说明 change: 优化person.py代码 2023-09-06 10:59:34 +08:00			`for folder in os.listdir(dir_path):`
			`__folder2 = os.path.join(dir_path, folder)`
add: 刮削演员信息并生成相应目录结构和演员图片 2023-09-06 09:48:24 +08:00			`# 判断是否文件夹`
			`if os.path.isdir(__folder2):`
			`for nfo_file in os.listdir(__folder2):`
			`__child_file_path = os.path.join(__folder2, nfo_file)`
			`if ".nfo" in os.path.basename(__child_file_path):`
			`__file_paths.append(__child_file_path)`
			`elif os.path.isfile(__folder2):`
			`__file_name = os.path.basename(__folder2)`
			`if ".nfo" in __file_name:`
			`__file_paths.append(__folder2)`
			`for __file_path in __file_paths:`
add: 新增日志类和时间类 2023-09-06 14:20:00 +08:00			`log.logger.info("开始处理元数据刮削识别:{0}".format(__file_path))`
add: 刮削演员信息并生成相应目录结构和演员图片 2023-09-06 09:48:24 +08:00			`# __file_path = "example/神出鬼没 (2023) - 2160p.nfo"`
			`__nfo_data = Analyze(file_path=__file_path).analyze()`
			`for __actor in __nfo_data["actors"]:`
			`__tmdbid = __actor["tmdbid"]`
			`__actor_name = __actor["name"]`
			`__name = __actor_name[1].lower()`
add: 刮削演员信息并生成相应目录结构和演员图片 2023-09-06 10:08:48 +08:00			`__full_actor_name = __actor_name + "-tmdb-" + __tmdbid`
add: 补充README.md说明 change: 优化person.py代码 2023-09-06 10:59:34 +08:00			`__path_dir = os.path.join(output, __name, __full_actor_name)`
add: 刮削演员信息并生成相应目录结构和演员图片 2023-09-06 09:48:24 +08:00			`if not os.path.exists(__path_dir):`
			`os.makedirs(__path_dir)`
add: 新增日志类和时间类 2023-09-06 14:20:00 +08:00			`# 如果存在元数据则不再进行刮削`
			`if "person.nfo" not in os.listdir(__path_dir):`
			`Tmdb(tmdb_id=__tmdbid, actor_path=__path_dir, tmdb_token=tmdb_token).get_actor_info()`
			`# 如果存在海报则不再进行刮削`
			`if "folder.jpg" not in os.listdir(__path_dir):`
add: 补充README.md说明 change: 优化person.py代码 2023-09-06 10:59:34 +08:00			`Tmdb(tmdb_id=__tmdbid, actor_path=__path_dir, tmdb_token=tmdb_token).get_actor_image()`


			`if __name__ == '__main__':`
			`# 扫描目录`
			`__dir_path = "example/movies"`
			`# 输出演员元数据目录`
			`__output = "data/metadata/person"`
			`# TMDB API TOKEN`
			`__tmdb_token = "eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiIxYTU4ODAxMGY5OTUwYWEyNThhYjFhYjJlMjI4NGVmYSIsInN1YiI6IjYxYmRmOGNjMzgzZGYyMDA0MjIzNDhjOSIsInNjb3BlcyI6WyJhcGlfcmVhZCJdLCJ2ZXJzaW9uIjoxfQ.RPG8F8AELlK7MgrXDR2U0YRv61VteZZ9ponilnkQqkE"`
add: 新增日志类和时间类 2023-09-06 14:20:00 +08:00			`# 初始化日志`
			`log = __init_logger()`
add: 补充README.md说明 change: 优化person.py代码 2023-09-06 10:59:34 +08:00			`# 开始执行主程序`
			`__execute(dir_path=__dir_path, output=__output, tmdb_token=__tmdb_token)`