chap07/kadai2.py

サンプルコードのダウンロード

 1#!/usr/bin/env python
 2# -*- coding: utf-8 -*-
 3
 4import sys
 5import re
 6import chardet
 7from urllib import request
 8
 9
10def get_url_contents(url):
11    response = request.urlopen(url)
12    stream   = response.read()
13    # chardet.detectで文字コードを推測
14    encoding = chardet.detect(stream).get('encoding')
15    return stream.decode(encoding)
16
17
18def get_html_title(contents):
19    # titleタグのパターン
20    pattern = r'<title>(.*)</title>'
21
22    # 検索してタグの中身を取り出す
23    match = re.search(pattern, contents)
24    if match is not None:
25        return match.groups()[0]
26    else:
27        return 'Error: title tag was not found'
28
29
30if __name__ == '__main__':
31    contents = get_url_contents(sys.argv[1])
32    print(get_html_title(contents))