1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
4import sys
5import re
6import chardet
7from urllib import request
8
9
10def get_url_contents(url):
11 response = request.urlopen(url)
12 stream = response.read()
13 # chardet.detectで文字コードを推測
14 encoding = chardet.detect(stream).get('encoding')
15 return stream.decode(encoding)
16
17
18def get_html_title(contents):
19 # titleタグのパターン
20 pattern = r'<title>(.*)</title>'
21
22 # 検索してタグの中身を取り出す
23 match = re.search(pattern, contents)
24 if match is not None:
25 return match.groups()[0]
26 else:
27 return 'Error: title tag was not found'
28
29
30if __name__ == '__main__':
31 contents = get_url_contents(sys.argv[1])
32 print(get_html_title(contents))