#!/usr/local/bin/python
# encoding: utf-8
"""
Documentation for polyglot can be found here: http://pypolyglot.readthedocs.org/en/stable
Translate documents and webpages to various markup languages and document formats (html, epub, mobi ..)
Usage:
polyglot init
polyglot [-oc] (pdf|html|epub|mobi) <url> [<destinationFolder> -f <filename> -s <pathToSettingsFile>]
polyglot kindle <url> [-f <filename> -s <pathToSettingsFile>]
polyglot [-o] (epub|mobi) <docx> [<destinationFolder> -f <filename> -s <pathToSettingsFile>]
polyglot kindle <docx> [-f <filename> -s <pathToSettingsFile>]
polyglot [-o] kindleNB2MD <notebook> [<destinationFolder> -s <pathToSettingsFile>]
Options:
init setup the polyglot settings file for the first time
pdf print webpage to pdf
html parse and download webpage to a local HTML document
epub generate an epub format book from a webpage URL
kindle send webpage article straight to kindle
-h, --help show this help message
-v, --version show version
-o, --open open the document after creation
-c, --clean add polyglot's clean styling to the output document
<url> the url of the article's webpage
<docx> path to a DOCX file
-s <pathToSettingsFile>, --settings <pathToSettingsFile> path to alternative settings file (optional)
<destinationFolder> the folder to save the parsed PDF or HTML document to (optional)
-f <filename>, --filename <filename> the name of the file to save, otherwise use webpage title as filename (optional)
"""
################# GLOBAL IMPORTS ####################
import sys
import os
os.environ['TERM'] = 'vt100'
import readline
import glob
import pickle
from subprocess import Popen, PIPE, STDOUT
from docopt import docopt
from fundamentals import tools, times
from polyglot import printpdf
from polyglot import htmlCleaner
# from ..__init__ import *
[docs]def main(arguments=None):
"""
*The main function used when ``cl_utils.py`` is run as a single script from the cl, or when installed as a cl command*
"""
# setup the command-line util settings
su = tools(
arguments=arguments,
docString=__doc__,
logLevel="WARNING",
options_first=False,
projectName="polyglot"
)
arguments, settings, log, dbConn = su.setup()
# unpack remaining cl arguments using `exec` to setup the variable names
# automatically
for arg, val in arguments.iteritems():
if arg[0] == "-":
varname = arg.replace("-", "") + "Flag"
else:
varname = arg.replace("<", "").replace(">", "")
if isinstance(val, str) or isinstance(val, unicode):
exec(varname + " = '%s'" % (val,))
else:
exec(varname + " = %s" % (val,))
if arg == "--dbConn":
dbConn = val
log.debug('%s = %s' % (varname, val,))
## START LOGGING ##
startTime = times.get_now_sql_datetime()
log.info(
'--- STARTING TO RUN THE cl_utils.py AT %s' %
(startTime,))
# for k, v in locals().iteritems():
# print k, v
if not destinationFolder:
destinationFolder = os.getcwd()
if not filenameFlag:
filenameFlag = False
if not cleanFlag:
readability = False
else:
readability = True
if init:
from os.path import expanduser
home = expanduser("~")
filepath = home + "/.config/polyglot/polyglot.yaml"
try:
cmd = """open %(filepath)s""" % locals()
p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
except:
pass
try:
cmd = """start %(filepath)s""" % locals()
p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
except:
pass
if pdf and url:
filepath = printpdf.printpdf(
log=log,
settings=settings,
url=url,
folderpath=destinationFolder,
title=filenameFlag,
append=False,
readability=readability
).get()
if html and url:
cleaner = htmlCleaner.htmlCleaner(
log=log,
settings=settings,
url=url,
outputDirectory=destinationFolder,
title=filenameFlag, # SET TO FALSE TO USE WEBPAGE TITLE,
style=cleanFlag, # add polyglot's styling to the HTML document
metadata=True, # include metadata in generated HTML (e.g. title),
h1=True # include title as H1 at the top of the doc
)
filepath = cleaner.clean()
if epub:
if url:
iinput = url
else:
iinput = docx
from polyglot import ebook
epub = ebook(
log=log,
settings=settings,
urlOrPath=iinput,
title=filenameFlag,
bookFormat="epub",
outputDirectory=destinationFolder
)
filepath = epub.get()
if mobi:
if url:
iinput = url
else:
iinput = docx
from polyglot import ebook
mobi = ebook(
log=log,
settings=settings,
urlOrPath=iinput,
title=filenameFlag,
bookFormat="mobi",
outputDirectory=destinationFolder,
)
filepath = mobi.get()
if kindle:
if url:
iinput = url
else:
iinput = docx
from polyglot import kindle
sender = kindle(
log=log,
settings=settings,
urlOrPath=iinput,
title=filenameFlag
)
success = sender.send()
if kindleNB2MD:
basename = os.path.basename(notebook)
extension = os.path.splitext(basename)[1]
filenameNoExtension = os.path.splitext(basename)[0]
if destinationFolder:
filepath = destinationFolder + "/" + filenameNoExtension + ".md"
else:
filepath = notebook.replace("." + extension, ".md")
from polyglot.markdown import kindle_notebook
nb = kindle_notebook(
log=log,
kindleExportPath=notebook,
outputPath=filepath
)
nb.convert()
if openFlag:
try:
cmd = """open %(filepath)s""" % locals()
p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
except:
pass
try:
cmd = """start %(filepath)s""" % locals()
p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
except:
pass
if "dbConn" in locals() and dbConn:
dbConn.commit()
dbConn.close()
## FINISH LOGGING ##
endTime = times.get_now_sql_datetime()
runningTime = times.calculate_time_difference(startTime, endTime)
log.info('-- FINISHED ATTEMPT TO RUN THE cl_utils.py AT %s (RUNTIME: %s) --' %
(endTime, runningTime, ))
return
if __name__ == '__main__':
main()