diff --git a/person.py b/person.py index 0dea569..17b4db0 100644 --- a/person.py +++ b/person.py @@ -4,6 +4,24 @@ import requests import os import xml.etree.ElementTree as ET import json +from utils.LoggerUtil import Logger + + +def __init_logger(log_file="tmdb.log", level="info", back_count=3): + """ + 服务日志记录对象 + :param log_file: 日志文件名 + :param level: 日志记录级别。debug info warning error crit + :param back_count: 日志文件备份天数 + :return: 日志对象 + """ + # 获取当前文件路径 + current_path = os.path.abspath(__file__) + # 获取当前文件的父目录 + father_path = os.path.abspath(os.path.dirname(current_path) + os.path.sep + ".") + # (必填)日志文件名 log_file="/data/ws_env/logtest/process.log" + log_file_abspath = os.path.join(father_path, "logs", log_file) + return Logger(log_file_abspath, level=level, backCount=back_count) class Analyze: @@ -39,15 +57,16 @@ class Tmdb: self.image_path = None self.tmdb_id = tmdb_id self.actor_path = actor_path - self.tmdb_token = tmdb_token + self.header = { + "accept": "application/json", + "Authorization": "Bearer " + tmdb_token + } def get_actor_info(self): url = "https://api.themoviedb.org/3/person/" + self.tmdb_id + "?language=zh-CN" - headers = { - "accept": "application/json", - "Authorization": "Bearer " + self.tmdb_token - } + headers = self.header response = requests.get(url, headers=headers) + log.logger.info("当前刮削到的演员元数据:{0}".format(response.text)) return response.text def get_actor_image(self): @@ -60,9 +79,34 @@ class Tmdb: with open(os.path.join(self.actor_path, "folder." + suffix), 'wb') as f: f.write(response.content) + def __translations(self): + url = "https://api.themoviedb.org/3/person/" + self.tmdb_id + "/translations" + headers = self.header + response = requests.get(url, headers=headers) + return response.text + + def __get_actor_plot(self): + translations = self.__translations() + translations_list = json.loads(translations)["translations"] + translations_json = {} + for translation in translations_list: + translations_json[translation["iso_3166_1"]] = translation + plot = "" + if "CN" in translations_json.keys(): + zh = translations_json["CN"] + plot = zh["data"]["biography"] + elif "US" in translations_json.keys(): + us = translations_json["US"] + plot = us["data"]["biography"] + return plot + + def create_actor_nfo(self): + plot = self.__get_actor_plot() + def __execute(dir_path, output, tmdb_token): __file_paths = [] + log.logger.info("当前执行元数据刮削识别的根文件夹:{0}".format(dir_path)) for folder in os.listdir(dir_path): __folder2 = os.path.join(dir_path, folder) # 判断是否文件夹 @@ -76,7 +120,7 @@ def __execute(dir_path, output, tmdb_token): if ".nfo" in __file_name: __file_paths.append(__folder2) for __file_path in __file_paths: - print("开始处理元数据刮削识别:{0}".format(__file_path)) + log.logger.info("开始处理元数据刮削识别:{0}".format(__file_path)) # __file_path = "example/神出鬼没 (2023) - 2160p.nfo" __nfo_data = Analyze(file_path=__file_path).analyze() for __actor in __nfo_data["actors"]: @@ -87,10 +131,11 @@ def __execute(dir_path, output, tmdb_token): __path_dir = os.path.join(output, __name, __full_actor_name) if not os.path.exists(__path_dir): os.makedirs(__path_dir) - if ".nfo" not in os.listdir(__path_dir): - __actor_info = Tmdb(tmdb_id=__tmdbid, actor_path=__path_dir, tmdb_token=tmdb_token).get_actor_info() - print(__actor_info) - if "folder" not in os.listdir(__path_dir): + # 如果存在元数据则不再进行刮削 + if "person.nfo" not in os.listdir(__path_dir): + Tmdb(tmdb_id=__tmdbid, actor_path=__path_dir, tmdb_token=tmdb_token).get_actor_info() + # 如果存在海报则不再进行刮削 + if "folder.jpg" not in os.listdir(__path_dir): Tmdb(tmdb_id=__tmdbid, actor_path=__path_dir, tmdb_token=tmdb_token).get_actor_image() @@ -101,5 +146,7 @@ if __name__ == '__main__': __output = "data/metadata/person" # TMDB API TOKEN __tmdb_token = "eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiIxYTU4ODAxMGY5OTUwYWEyNThhYjFhYjJlMjI4NGVmYSIsInN1YiI6IjYxYmRmOGNjMzgzZGYyMDA0MjIzNDhjOSIsInNjb3BlcyI6WyJhcGlfcmVhZCJdLCJ2ZXJzaW9uIjoxfQ.RPG8F8AELlK7MgrXDR2U0YRv61VteZZ9ponilnkQqkE" + # 初始化日志 + log = __init_logger() # 开始执行主程序 __execute(dir_path=__dir_path, output=__output, tmdb_token=__tmdb_token) diff --git a/utils/DateUtil.py b/utils/DateUtil.py new file mode 100644 index 0000000..2ebcc17 --- /dev/null +++ b/utils/DateUtil.py @@ -0,0 +1,42 @@ +# -*-encoding:utf-8 -*- + +import datetime +import time + +DEFAULT_DATE_FORMAT = '%Y-%m-%d' +FULL_DATE_FORMAT = '%Y-%m-%d %H:%M:%S' + + +def get_someday_before(n, fmt=DEFAULT_DATE_FORMAT): + """获取n天前日期,返回的fmt日期格式""" + yes = datetime.date.today() - datetime.timedelta(days=n) + return yes.strftime(fmt) + + +def get_today(fmt=DEFAULT_DATE_FORMAT): + """获取今天前日期,返回的fmt日期格式""" + return get_someday_before(0, fmt) + + +def get_yesterday(fmt=DEFAULT_DATE_FORMAT): + """获取昨天前日期,返回的fmt日期格式""" + return get_someday_before(1, fmt) + + +def get_someday_before_tmp(n, ms=False): + """获取n天前的时间戳""" + yes = datetime.date.today() - datetime.timedelta(days=n) + if ms: + return time.mktime(yes.timetuple()) * 1000 + else: + return time.mktime(yes.timetuple()) + + +def get_today_tmp(ms=False): + """获取今天前的时间戳""" + return get_someday_before_tmp(0, ms) + + +def get_yesterday_tmp(n, ms=False): + """获取昨天前的时间戳""" + return get_someday_before_tmp(1, ms) diff --git a/utils/LoggerUtil.py b/utils/LoggerUtil.py new file mode 100644 index 0000000..5e5e16c --- /dev/null +++ b/utils/LoggerUtil.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" + @Time : 2019/4/15 + @Author : LXW + @Site : + @File : LoggerUtil.py + @Software: PyCharm + @Description: 日志工具,默认保存文件在当前文件夹下 +""" +import logging +from logging import handlers +from functools import wraps +import traceback +import os + + +class Logger(object): + """ + 日志封装类: + 1. 作用是同时打印和写日志文件。 + 2. 业务日志如果是需要告警或者邮件,则走warning。 + 3. 运行出错则走error + 4. record用来切片函数,记录函数开始和结束。 + """ + level_relations = { + 'debug': logging.DEBUG, + 'info': logging.INFO, + 'warning': logging.WARNING, + 'error': logging.ERROR, + 'crit': logging.CRITICAL + } # 日志级别关系映射 + + def __init__(self, filename, level='info', when='D', backCount=3, + fmt='%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s'): + if not os.path.exists(os.path.dirname(filename)): + os.makedirs(os.path.dirname(filename)) + self.logger = logging.getLogger(filename) + format_str = logging.Formatter(fmt) # 设置日志格式 + self.logger.setLevel(self.level_relations.get(level)) # 设置日志级别 + sh = logging.StreamHandler() # 往屏幕上输出 + sh.setFormatter(format_str) # 设置屏幕上显示的格式 + th = handlers.TimedRotatingFileHandler(filename=filename, when=when, backupCount=backCount, + encoding='utf-8') # 往文件里写入#指定间隔时间自动生成文件的处理器 + # 实例化TimedRotatingFileHandler + # interval是时间间隔,backupCount是备份文件的个数,如果超过这个个数,就会自动删除,when是间隔的时间单位,单位有以下几种: + # S 秒 + # M 分 + # H 小时、 + # D 天、 + # W 每星期(interval==0时代表星期一) + # midnight 每天凌晨 + th.setFormatter(format_str) # 设置文件里写入的格式 + self.logger.addHandler(sh) # 把对象加到logger里 + self.logger.addHandler(th) + + def record(self, arg): + def _log(func): + @wraps(func) + def wrapper(*args, **kwargs): + self.logger.info(arg + '_开始----' + func.__name__) + try: + ret = func(*args, **kwargs) + except Exception as e: + desc = "%s_traceback.format_exc(): %s" % (e, traceback.format_exc()) + self.logger.error(desc) + self.logger.info(arg + '_中断----' + func.__name__) + # exit(1) + return False + else: + self.logger.info(arg + '_结束----' + func.__name__) + return ret + + return wrapper + + return _log diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..faa18be --- /dev/null +++ b/utils/__init__.py @@ -0,0 +1,2 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*-