#!/usr/bin/env python3 import re import sys import json from locale import getpreferredencoding from urllib.request import urlopen, Request from urllib.parse import urlencode from argparse import ArgumentParser def die(msg, *args): sys.stderr.write((msg % args) + '\n') sys.exit(1) def translate(text, dst_lang, src_lang='auto'): params = dict( client = 't', hl = 'en', multires = 1, otf = 2, pc = 1, ssel = 0, tsel = 0, sc = 1, text = text, sl = src_lang, tl = dst_lang, ) url = 'http://translate.google.com/translate_a/t' data = ('client=t&' + urlencode(dict(text=text)) + '&hl=en&' + urlencode(dict(sl=src_lang)) + '&' + urlencode(dict(tl=dst_lang)) + '&multires=1&otf=2&pc=1&ssel=0&tsel=0&sc=1') headers = { 'User-agent': 'Mozilla/5.0' } req = Request(url=url, data=data.encode('utf-8'), headers=headers) page = urlopen(req).read().decode('utf-8') # attempt to fix invalid json (will break translated text that had ,,+) page = re.sub(',,+', ',', page) try: result = json.loads(page) except Exception as e: die("Error parsing results: %s", e) return ''.join((c[0] for c in result[0])) def parse_args(): parser = ArgumentParser(description='Translate text using ' 'Google Translate.') parser.add_argument('--from','-f', dest='from_', metavar='LANG', default='auto', help='Translate from LANG language ' '(e.g. en, de, es, default: auto)') parser.add_argument('--to', '-t', metavar='LANG', default='en', help='Translate to LANG language ' '(e.g. en, de, es, default: en)') parser.add_argument('--input-file', '-i', metavar='FILE', help='Get text to translate from FILE instead of stdin') parser.add_argument('--output-file', '-o', metavar='FILE', help='Output translated text to FILE instead of stdout') parser.add_argument('--input-encoding', '-I', metavar='ENC', default='LOCALE,utf-8,iso-8859-15', help='Use ENC caracter encoding to read the input, ' 'can be a comma separated list of encodings to ' 'try, LOCALE being a special value for the ' 'user\'s locale-specified preferred encoding ' '(default: LOCALE,utf-8,iso-8859-15)') parser.add_argument('--output-encoding', '-O', metavar='ENC', default='LOCALE', help='Use ENC caracter encoding to write the output ' '(default: LOCALE)') args = parser.parse_args() args.input = sys.stdin if args.input_file: try: args.input = open(args.input_file) except Exception as e: die("Can't open file %s for reading: %s", args.input_file, e) args.output = sys.stdout if args.output_file: try: args.output = open(args.output_file, 'w') except Exception as e: die("Can't open file %s for writing: %s", args.output_file, e) args.input_encoding = args.input_encoding.split(',') if not args.input_encoding: args.input_encoding = ['LOCALE'] args.input_encoding = [getpreferredencoding() if e == 'LOCALE' else e for e in args.input_encoding] if args.output_encoding == 'LOCALE': args.output_encoding = getpreferredencoding() return args args = parse_args() text = args.input.buffer.read() err = None for enc in args.input_encoding: try: text = text.decode(enc) break except UnicodeError as e: err = e else: die("Can't decode input (tried these encodings: %s). Last error: %s", args.input_encoding, err) trans_text = translate(text, args.to, args.from_) + '\n' args.output.buffer.write(trans_text.encode(args.output_encoding))