Source code for ccsgp_get_started.examples.gp_datdir

import logging, argparse, os, sys
import numpy as np
from collections import OrderedDict
from ..ccsgp.ccsgp import make_plot
from .utils import getWorkDirs
from ..ccsgp.utils import getOpts

[docs]def gp_datdir(initial, topN):
  """example for plotting from a text file via numpy.loadtxt

  1. prepare input/output directories
  2. load the data into an OrderedDict() [adjust axes units]
  3. sort countries from highest to lowest population
  4. select the <topN> most populated countries
  5. call ccsgp.make_plot with data from 4

  Below is an output image for country initial T and the 4 most populated
  countries for this initial (click to enlarge). Also see::

    $ python -m ccsgp_get_started.examples.gp_datdir -h

  for help on the command line options.

  .. image:: pics/T.png
     :width: 450 px

  .. image:: pics/U.png
     :width: 450 px

  :param initial: country initial
  :type initial: str
  :param topN: number of most populated countries to plot
  :type topN: int
  :ivar inDir: input directory according to package structure and initial
  :ivar outDir: output directory according to package structure
  :ivar data: OrderedDict with datasets to plot as separate keys
  :ivar file: data input file for specific country, format: [x y] OR [x y dx dy]
  :ivar country: country, filename stem of input file
  :ivar file_url: absolute url to input file
  :ivar nSets: number of datasets
  """
  # prepare input/output directories
  inDir, outDir = getWorkDirs()
  initial = initial.capitalize()
  inDir = os.path.join(inDir, initial)
  if not os.path.exists(inDir): # catch missing initial
    return "initial %s doesn't exist" % initial
  # prepare data
  data = OrderedDict()
  for file in os.listdir(inDir):
    country = os.path.splitext(file)[0]
    file_url = os.path.join(inDir, file)
    data[country] = np.loadtxt(open(file_url, 'rb')) # load data
    # set y-axis unit to 1M
    data[country][:, 1] /= 1e6
    if data[country].shape[1] > 2: data[country][:, 3:] /= 1e6
  logging.debug(data) # shown if --log flag given on command line
  # sort countries according to mean population (highest -> lowest)
  sorted_data = OrderedDict(sorted(
    data.items(), key = lambda t: np.mean(t[1][:,1]), reverse = True
  ))
  # "pop" (select) N most populated countries
  top_data = OrderedDict(
    sorted_data.popitem(last = False) for i in xrange(topN)
    if sorted_data
  )
  # generate plot using ccsgp.make_plot
  nSets = len(top_data)
  make_plot(
    data = top_data.values(),
    properties = [ getOpts(i) for i in xrange(nSets) ],
    titles = top_data.keys(), # use data keys as legend titles
    name = os.path.join(outDir, initial),
    key = [ 'at graph 1., 1.2', 'maxrows 2' ],
    ylabel = 'total population ({/Symbol \664} 10^{6})',
    xlabel = 'year', rmargin = 0.99, tmargin = 0.85, size='8.5in,8in'
  )
  return 'done'

if __name__ == '__main__':
  parser = argparse.ArgumentParser()
  parser.add_argument("initial", help="country initial = input subdir with txt files")
  parser.add_argument("topN", help="number of most populated countries to plot")
  parser.add_argument("--log", help="show log output", action="store_true")
  args = parser.parse_args()
  loglevel = 'DEBUG' if args.log else 'WARNING'
  logging.basicConfig(
    format='%(message)s', level=getattr(logging, loglevel)
  )
  print gp_datdir(args.initial, int(args.topN))