piapro.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. import urllib.parse
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. ExtractorError,
  5. clean_html,
  6. get_element_by_class,
  7. parse_duration,
  8. parse_filesize,
  9. str_to_int,
  10. unified_timestamp,
  11. urlencode_postdata,
  12. )
  13. class PiaproIE(InfoExtractor):
  14. _NETRC_MACHINE = 'piapro'
  15. _VALID_URL = r'https?://piapro\.jp/(?:t|content)/(?P<id>[\w-]+)/?'
  16. _TESTS = [{
  17. 'url': 'https://piapro.jp/t/NXYR',
  18. 'md5': 'f7c0f760913fb1d44a1c45a4af793909',
  19. 'info_dict': {
  20. 'id': 'NXYR',
  21. 'ext': 'mp3',
  22. 'uploader': 'wowaka',
  23. 'uploader_id': 'wowaka',
  24. 'title': '裏表ラバーズ',
  25. 'description': 'http://www.nicovideo.jp/watch/sm8082467',
  26. 'duration': 189.0,
  27. 'timestamp': 1251785475,
  28. 'thumbnail': r're:^https?://.*\.(?:png|jpg)$',
  29. 'upload_date': '20090901',
  30. 'view_count': int,
  31. },
  32. }, {
  33. 'note': 'There are break lines in description, mandating (?s) flag',
  34. 'url': 'https://piapro.jp/t/9cSd',
  35. 'md5': '952bb6d1e8de95050206408a87790676',
  36. 'info_dict': {
  37. 'id': '9cSd',
  38. 'ext': 'mp3',
  39. 'title': '青に溶けた風船 / 初音ミク',
  40. 'description': 'md5:d395a9bd151447631a5a1460bc7f9132',
  41. 'uploader': 'シアン・キノ',
  42. 'duration': 229.0,
  43. 'timestamp': 1644030039,
  44. 'upload_date': '20220205',
  45. 'view_count': int,
  46. 'thumbnail': r're:^https?://.*\.(?:png|jpg)$',
  47. 'uploader_id': 'cyankino',
  48. },
  49. }, {
  50. 'url': 'https://piapro.jp/content/hcw0z3a169wtemz6',
  51. 'only_matching': True,
  52. }, {
  53. 'url': 'https://piapro.jp/t/-SO-',
  54. 'only_matching': True,
  55. }]
  56. _login_status = False
  57. def _perform_login(self, username, password):
  58. login_ok = True
  59. login_form_strs = {
  60. '_username': username,
  61. '_password': password,
  62. '_remember_me': 'on',
  63. 'login': 'ログイン',
  64. }
  65. self._request_webpage('https://piapro.jp/login/', None)
  66. urlh = self._request_webpage(
  67. 'https://piapro.jp/login/exe', None,
  68. note='Logging in', errnote='Unable to log in',
  69. data=urlencode_postdata(login_form_strs))
  70. if urlh is False:
  71. login_ok = False
  72. else:
  73. parts = urllib.parse.urlparse(urlh.url)
  74. if parts.path != '/':
  75. login_ok = False
  76. if not login_ok:
  77. self.report_warning(
  78. 'unable to log in: bad username or password')
  79. self._login_status = login_ok
  80. def _real_extract(self, url):
  81. video_id = self._match_id(url)
  82. webpage = self._download_webpage(url, video_id)
  83. category_id = self._search_regex(r'categoryId=(.+)">', webpage, 'category ID')
  84. if category_id not in ('1', '2', '21', '22', '23', '24', '25'):
  85. raise ExtractorError('The URL does not contain audio.', expected=True)
  86. def extract_info(name, description):
  87. return self._search_regex(rf'{name}[::]\s*([\d\s,:/]+)\s*</p>', webpage, description, default=None)
  88. return {
  89. 'id': video_id,
  90. 'title': clean_html(get_element_by_class('contents_title', webpage)),
  91. 'description': clean_html(get_element_by_class('contents_description', webpage)),
  92. 'uploader': clean_html(get_element_by_class('contents_creator_txt', webpage)),
  93. 'uploader_id': self._search_regex(
  94. r'<a\s+href="/([^"]+)"', get_element_by_class('contents_creator', webpage), 'uploader id', default=None),
  95. 'timestamp': unified_timestamp(extract_info('投稿日', 'timestamp'), False),
  96. 'duration': parse_duration(extract_info('長さ', 'duration')),
  97. 'view_count': str_to_int(extract_info('閲覧数', 'view count')),
  98. 'thumbnail': self._html_search_meta('twitter:image', webpage),
  99. 'filesize_approx': parse_filesize((extract_info('サイズ', 'size') or '').replace(',', '')),
  100. 'url': self._search_regex(r'\"url\":\s*\"(.*?)\"', webpage, 'url'),
  101. 'ext': 'mp3',
  102. 'vcodec': 'none',
  103. }