#! /usr/bin/env python
'''
A utilities library for various io/data aggregation tasks
'''
from __future__ import division, with_statement, print_function
from itertools import *
import os
import re
from collections import *
import subprocess
from subprocess import PIPE
import scipy
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
from sklearn.metrics import classification_report
from core.image_scanner import ImageScanner
import PIL
import cv2
# from IPython import display
# ------------------------------------------------------------------------------
[docs]def get_report(y_true, y_pred):
'''
returns a classification report as a DataFrame, rather than as text
Args:
y_true (array-like):
list of true labels
y_pred (array-like):
list of predicted labels
Returns:
classification report: DataFrame
'''
x = classification_report(y_true, y_pred)
x = re.sub('avg / total', 'total', x)
x = map(lambda x: re.split(' +', x), x.split('\n'))
x = map(lambda x: filter(lambda x: x != '', x), x)
x = filter(lambda x: x != [], x)
report = DataFrame(x[1:])
report.set_index(0, inplace=True)
report.columns = x[0]
return report
# ------------------------------------------------------------------------------
[docs]def pil_to_opencv(image):
'''
converts PIL.Image into cv2 image
Args:
image (PIL.Image):
pillow image
Returns:
cv2: opencv image
object is in BGR color space
'''
return cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
[docs]def opencv_to_pil(image):
'''
converts cv2 image into PIL.Image
Args:
image (cv2 image):
cv2 image
Returns:
PIL.Image: pillow image
object is in BGR color space
'''
return PIL.Image.fromarray(image)
[docs]def generate_samples(image, label, params):
'''
convenience function for generating samples from a provided image along with its label and parameters
Args:
image (PIL.Image):
pillow image
label (str):
image label
params (dict):
params to provide to ImageScanner
Returns:
list: matrix of patches
'''
scan = ImageScanner(image, **params)
func = getattr(scan, params['scan_method'])
return [[x, label, params] for x in func(**params)]
[docs]def get_channel_histogram(image, channel, bins=256, normalize=False, **kwargs):
'''
generates frequency data for a given channel of a provided image
Args:
image (cv2 image):
opencv image to be processed
channel (str):
color channel to be processed
acceptable values: r, g, b, h, s, v
bins opt(int):
number of bins to split histogram into
default: 256 (number of channel values for sRGB images)
normalize opt(bool):
normalize histogram data
default: False
Returns:
numpy.array: raveled array
'''
lut = {
'r': 2, 'g': 1, 'b': 0,
'h': 0, 's': 1, 'v': 2
}
output = cv2.calcHist([image],[lut[channel]], None, [bins], [0, 256])
if normalize:
output = cv2.normalize(output)
return output.ravel()
[docs]def create_histogram_stats(data, chan_data, channel):
'''
convenience function for appending statics based upon provided histogram data to data
Args:
data (DataFrame): data to be appended to
chan_data (DataFrame): channel histogram data
channel (str): name of channel
Returns:
None: None
'''
data[channel + '_' + 'mean'] = chan_data.apply(lambda x: x.mean() )
data[channel + '_' + 'max'] = chan_data.apply(lambda x: x.max() )
data[channel + '_' + 'argmax'] = chan_data.apply(lambda x: np.argmax(x) )
data[channel + '_' + 'std'] = chan_data.apply(lambda x: x.std() )
data[channel + '_' + 'skew'] = chan_data.apply(lambda x: scipy.stats.skew(x) )
data[channel + '_' + 'kurt'] = chan_data.apply(lambda x: scipy.stats.kurtosis(x) )
# ------------------------------------------------------------------------------
[docs]def get_histograms(image, bins=256, normalize=False, colorspace='rgb'):
'''
generates histogram data for each channel of an image
Args:
image (cv2 image):
opencv image to be processed
bins opt(int):
number of bins to split histogram into
default: 256 (number of channel values for sRGB images)
normalize opt(bool):
normalize histogram data
default: False
colorspace opt(str):
colorspace of provided image
acceptable values: 'rgb', 'hsv'
default: 'rgb'
Returns:
dict: dict of channel histograms
'''
return {chan: get_channel_histogram(image, chan, bins=bins, normalize=normalize) for chan in colorspace}
# ------------------------------------------------------------------------------
[docs]def plot_channel_histogram(image, channel, bins=256, normalize=False):
'''
plots a histogram of channel of a provided image
Args:
image (cv2 image):
opencv image to be processed
channel (str): color channel
bins opt(int):
number of bins to split histogram into
default: 256 (number of channel values for sRGB images)
normalize opt(bool):
normalize histogram data
default: False
Returns:
None: None
'''
lut = {
'r': 'r', 'g': 'g', 'b': 'b',
'h': 'w', 's': 'w', 'v': 'w'
}
hist = get_channel_histogram(image, channel, bins=bins, normalize=normalize)
Series(hist).plot(color=lut[channel])
[docs]def plot_histograms(image, bins=256, normalize=False):
'''
plots a histogram of all channels of a provided image
Args:
image (cv2 image):
opencv image to be processed
bins opt(int):
number of bins to split histogram into
default: 256 (number of channel values for sRGB images)
normalize opt(bool):
normalize histogram data
default: False
Returns:
None: None
'''
for hist, color in get_histograms(image, bins=bins, normalize=normalize).iteritems():
Series(hist).plot(color=color)
# ------------------------------------------------------------------------------
[docs]def execute_python_subshells(script, iterable):
'''
a simple hacky workaroud for multiprocessing's buginess
executes a new python subshell per item
Args:
script (str): fullpath of python script to run (check /bin)
iterable (iter): list of argument to provide each call
Returns:
None: None
'''
for item in iterable:
cmd = script, ' '.join(item), '2>/dev/null &'
cmd = ' '.join(cmd)
subprocess.Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE)
# ------------------------------------------------------------------------------
# def show_image(response):
# if not re.search('endgrain|database', response['snippet'], flags=re.IGNORECASE):
# print(response['snippet'], response['displayLink'])
# img = display.Image(url=r['link'], width=300, height=300)
# display.display(img)
# def display_results(response):
# for item in response:
# print(item['snippet'], item['displayLink'])
# img = display.Image(url=item['link'], width=300, height=300)
# display.display(img)
# ------------------------------------------------------------------------------
__all__ = [
'get_report',
'pil_to_opencv',
'opencv_to_pil',
'generate_samples',
'get_channel_histogram',
'create_histogram_stats',
'get_histograms',
'plot_channel_histogram',
'plot_histograms',
'execute_python_subshells'
# 'show_image',
# 'display_results'
]
def main():
pass
if __name__ == '__main__':
help(main)