Savitzky-Golay Smoothing GUI

Simple Smoothing GUI

In an effort to create a set of simple tools that are useful for data processing and realtime analysis of data we’ve been exploring a range of tools.  Granted there are a number of canned solutions in existence (e.g. National Instruments), however, to avoid the long-term challenges of compatibility we are looking for tools that can better serve our research goals.  Two packages that we’ve began to lean more heavily upon include pyqtgraph and guidata.  Both use PyQt4 and are compatible with Pyside for GUI rendering and construction.  Matplotlib is quite mature but it has been our experience that pyqtgraph is quite a bit faster for plotting data in realtime.

The code below integrates pyqtgraph directly into the guidata framework.  This is not a huge stretch as the pyqtgraph widgets integrate directly with the QWidget class in PyQt4.  For those looking for an example the following code illustrate very simply how to integrate one of these plots and update it using simulated data along with the ability to alter the smoothing parameters of the raw data on the fly.  One might envision the use of this approach to capture data from a streaming device (more on that later). It should be noted that the file loading feature has been disabled but it would’t be a huge stretch to re-enable this functionality for single spectra.


# -*- coding: utf-8 -*-
# Adapted from guidata examples:
# Copyright © 2009-2010 CEA
# Pierre Raybaut
# Licensed under the terms of the CECILL License
# (see guidata/__init__.py for details)
# Adapted by Brian Clowers brian.clowers@wsu.edu

"""
DataSetEditGroupBox and DataSetShowGroupBox demo

These group box widgets are intended to be integrated in a GUI application
layout, showing read-only parameter sets or allowing to edit parameter values.
"""

SHOW = True # Show test in GUI-based test launcher

import tempfile, atexit, shutil, datetime, numpy as N

from guidata.qt.QtGui import QMainWindow, QSplitter
from guidata.qt.QtCore import SIGNAL, QTimer
from guidata.qt import QtCore

from guidata.dataset.datatypes import (DataSet, BeginGroup, EndGroup, BeginTabGroup, EndTabGroup)
from guidata.dataset.dataitems import (FloatItem, IntItem, BoolItem, ChoiceItem, MultipleChoiceItem, ImageChoiceItem, FilesOpenItem, StringItem, TextItem, ColorItem, FileSaveItem, FileOpenItem, DirectoryItem, FloatArrayItem, DateItem, DateTimeItem)
from guidata.dataset.qtwidgets import DataSetShowGroupBox, DataSetEditGroupBox
from guidata.configtools import get_icon
from guidata.qthelpers import create_action, add_actions, get_std_icon

# Local test import:
from guidata.tests.activable_dataset import ExampleDataSet

import sys, os
import pyqtgraph as PG

#-----------------------------------
def simpleSmooth(fileName, polyOrder, pointLength, plotSmoothed = False, saveSmoothed = True):
    if not os.path.isfile(fileName):
        return False
    rawArray = get_ascii_data(fileName)
    #savitzky_golay(data, kernel = 11, order = 4)
    smoothArray = savitzky_golay(rawArray, kernel = pointLength, order = polyOrder)
    if plotSmoothed:
        plot_smoothed(smoothArray, rawArray, True)

    if saveSmoothed:
        newFileName = fileName.split(".")[0]
        newFileName+="_smth.csv"
 
    N.savetxt(newFileName, smoothArray, delimiter = ',', fmt = '%.4f')

    return smoothArray

#-----------------------------------

def get_ascii_data(filename):
    data_spectrum=N.loadtxt(filename,delimiter = ',', skiprows=0)##remember to change this depending on file format
    return data_spectrum

#-----------------------------------
def savitzky_golay(data, kernel = 11, order = 4):
 """
 applies a Savitzky-Golay filter
 input parameters:
 - data => data as a 1D numpy array
 - kernel => a positive integer > 2*order giving the kernel size
 - order => order of the polynomal
 returns smoothed data as a numpy array

 invoke like:
 smoothed = savitzky_golay(<rough>, [kernel = value], [order = value]

 From scipy website
 """
 try:
 kernel = abs(int(kernel))
 order = abs(int(order))
 except ValueError, msg:
 raise ValueError("kernel and order have to be of type int (floats will be converted).")
 if kernel % 2 != 1 or kernel < 1:
 raise TypeError("kernel size must be a positive odd number, was: %d" % kernel)
 if kernel < order + 2:
 raise TypeError("kernel is to small for the polynomals\nshould be > order + 2")

 # a second order polynomal has 3 coefficients
 order_range = range(order+1)
 half_window = (kernel -1) // 2
 b = N.mat([[k**i for i in order_range] for k in range(-half_window, half_window+1)])
 # since we don't want the derivative, else choose [1] or [2], respectively
 m = N.linalg.pinv(b).A[0]
 window_size = len(m)
 half_window = (window_size-1) // 2

 # precompute the offset values for better performance
 offsets = range(-half_window, half_window+1)
 offset_data = zip(offsets, m)

 smooth_data = list()

 # temporary data, with padded zeros (since we want the same length after smoothing)
 #data = numpy.concatenate((numpy.zeros(half_window), data, numpy.zeros(half_window)))
 # temporary data, with padded first/last values (since we want the same length after smoothing)
 firstval=data[0]
 lastval=data[len(data)-1]
 data = N.concatenate((N.zeros(half_window)+firstval, data, N.zeros(half_window)+lastval))

 for i in range(half_window, len(data) - half_window):
 value = 0.0
 for offset, weight in offset_data:
 value += weight * data[i + offset]
 smooth_data.append(value)
 return N.array(smooth_data)

#-----------------------------------

def first_derivative(y_data):
 """\
 calculates the derivative
 """
 
 y = (y_data[1:]-y_data[:-1])
 
 dy = y/2#((x_data[1:]-x_data[:-1])/2)

 return dy

#-----------------------------------
class SmoothGUI(DataSet):
 """
 Simple Smoother
 A simple application for smoothing a 1D text file at this stage. 
 Follows the KISS principle.
 """
 fname = FileOpenItem("Open file", ("txt", "csv"), "")

 kernel = FloatItem("Smooth Point Length", default=7, min=1, max=101, step=2, slider=True) 
 order = IntItem("Polynomial Order", default=3, min=3, max=17, slider=True)
 saveBool = BoolItem("Save Plot Output", default = True)
 plotBool = BoolItem("Plot Smoothed", default = True).set_pos(col=1)
 #color = ColorItem("Color", default="red")
 
#-----------------------------------
class MainWindow(QMainWindow):
 def __init__(self):
 QMainWindow.__init__(self)
 self.setWindowIcon(get_icon('python.png'))
 self.setWindowTitle("Simple Smoother")
 
 # Instantiate dataset-related widgets:
 self.smoothGB = DataSetEditGroupBox("Smooth Parameters",
 SmoothGUI, comment='')

 self.connect(self.smoothGB, SIGNAL("apply_button_clicked()"),
 self.update_window)

 self.fileName = ''

 self.kernel = 15
 self.order = 3
 self.pw = PG.PlotWidget(name='Plot1')
 self.pw.showGrid(x=True, y = True)

 self.p1 = self.pw.plot()
 self.p1.setPen('g', alpha = 1.0)#Does alpha even do anything?
 self.p2 = self.pw.plot(pen = 'y')
 self.pw.setLabel('left', 'Value', units='V')
 self.pw.setLabel('bottom', 'Time', units='s')

 splitter = QSplitter(QtCore.Qt.Vertical, parent = self)

 splitter.addWidget(self.smoothGB)
 splitter.addWidget(self.pw)
 self.setCentralWidget(splitter)
 self.setContentsMargins(10, 5, 10, 5)
 
 # File menu
 file_menu = self.menuBar().addMenu("File")
 quit_action = create_action(self, "Quit",
 shortcut="Ctrl+Q",
 icon=get_std_icon("DialogCloseButton"),
 tip="Quit application",
 triggered=self.close)
 add_actions(file_menu, (quit_action, ))
 
 ## Start a timer to rapidly update the plot in pw
 self.t = QTimer()
 self.t.timeout.connect(self.updateData)
 self.t.start(1000) 

 def rand(self,n):
 data = N.random.random(n)
 data[int(n*0.1):int(n*0.23)] += .5
 data[int(n*0.18):int(n*0.25)] += 1
 data[int(n*0.1):int(n*0.13)] *= 2.5
 data[int(n*0.18)] *= 2
 data *= 1e-12
 return data, N.arange(n, n+len(data)) / float(n)
 

 def updateData(self):
 yd, xd = self.rand(100)
 ydSmooth = savitzky_golay(yd, kernel = self.kernel, order = self.order)
 
 if self.smoothGB.dataset.plotBool:
 self.p2.setData(y=ydSmooth, x = xd, clear = True)
 self.p1.setData(y=yd*-1, x=xd, clear = True)
 else:
 self.p1.setData(y=yd, x=xd, clear = True)
 self.p2.setData(y=[yd[0]], x = [xd[0]], clear = True)

 if self.smoothGB.dataset.saveBool:
 if os.path.isfile(self.fileName):
 newFileName = self.fileName.split(".")[0]
 
 else:
 newFileName = "test"
 newFileName+="_smth.csv"
 
 N.savetxt(newFileName, ydSmooth, delimiter = ',')#, fmt = '%.4f')


 
 def update_window(self):
 dataset = self.smoothGB.dataset
 self.order = dataset.order
 self.kernel = dataset.kernel
 self.fileName = dataset.fname

 
 
if __name__ == '__main__':
 from guidata.qt.QtGui import QApplication
 app = QApplication(sys.argv)
 window = MainWindow()
 window.show()
 sys.exit(app.exec_())

Gantt Charts in Matplotlib

GanttPlotLove it or hate it, the lack of a tractable options to create Gantt charts warrants frustration at times.  A recent post on Bitbucket provides a nice implementation using matplotlib and python as a platform.  In order to expand the basic functionality a few modifications enable a set of features that highlight the relative contributions of the team participants.  In the example provided above the broad tasks are indicated in yellow while the two inset bars (red:student and blue:PI) illustrate the percent effort.  See the source below for the details.

"""
Creates a simple Gantt chart
Adapted from https://bitbucket.org/DBrent/phd/src/1d1c5444d2ba2ee3918e0dfd5e886eaeeee49eec/visualisation/plot_gantt.py
BHC 2014
"""

import datetime as dt
import matplotlib.pyplot as plt
import matplotlib.font_manager as font_manager
import matplotlib.dates
from matplotlib.dates import MONTHLY, DateFormatter, rrulewrapper, RRuleLocator

from pylab import *

def create_date(month,year):
"""Creates the date"""

date = dt.datetime(int(year), int(month), 1)
mdate = matplotlib.dates.date2num(date)

return mdate

# Data

pos = arange(0.5,5.5,0.5)

ylabels = []
ylabels.append('Hardware Design & Review')
ylabels.append('Hardware Construction')
ylabels.append('Integrate and Test Laser Source')
ylabels.append('Objective #1')
ylabels.append('Objective #2')
ylabels.append('Present at ASMS')
ylabels.append('Present Data at Gordon Conference')
ylabels.append('Manuscripts and Final Report')

effort = []
effort.append([0.2, 1.0])
effort.append([0.2, 1.0])
effort.append([0.2, 1.0])
effort.append([0.3, 0.75])
effort.append([0.25, 0.75])
effort.append([0.3, 0.75])
effort.append([0.5, 0.5])
effort.append([0.7, 0.4])

customDates = []
customDates.append([create_date(5,2014),create_date(6,2014)])
customDates.append([create_date(6,2014),create_date(8,2014),create_date(8,2014)])
customDates.append([create_date(7,2014),create_date(9,2014),create_date(9,2014)])
customDates.append([create_date(10,2014),create_date(3,2015),create_date(3,2015)])
customDates.append([create_date(2,2015),create_date(6,2015),create_date(6,2015)])
customDates.append([create_date(5,2015),create_date(6,2015),create_date(6,2015)])
customDates.append([create_date(6,2015),create_date(7,2015),create_date(7,2015)])
customDates.append([create_date(4,2015),create_date(8,2015),create_date(8,2015)])

task_dates = {}
for i,task in enumerate(ylabels):
task_dates[task] = customDates[i]
# task_dates['Climatology'] = [create_date(5,2014),create_date(6,2014),create_date(10,2013)]
# task_dates['Structure'] = [create_date(10,2013),create_date(3,2014),create_date(5,2014)]
# task_dates['Impacts'] = [create_date(5,2014),create_date(12,2014),create_date(2,2015)]
# task_dates['Thesis'] = [create_date(2,2015),create_date(5,2015)]

# Initialise plot

fig = plt.figure()
# ax = fig.add_axes([0.15,0.2,0.75,0.3]) #[left,bottom,width,height]
ax = fig.add_subplot(111)

# Plot the data

start_date,end_date = task_dates[ylabels[0]]
ax.barh(0.5, end_date - start_date, left=start_date, height=0.3, align='center', color='blue', alpha = 0.75)
ax.barh(0.45, (end_date - start_date)*effort[0][0], left=start_date, height=0.1, align='center', color='red', alpha = 0.75, label = "PI Effort")
ax.barh(0.55, (end_date - start_date)*effort[0][1], left=start_date, height=0.1, align='center', color='yellow', alpha = 0.75, label = "Student Effort")
for i in range(0,len(ylabels)-1):
labels = ['Analysis','Reporting'] if i == 1 else [None,None]
start_date,mid_date,end_date = task_dates[ylabels[i+1]]
piEffort, studentEffort = effort[i+1]
ax.barh((i*0.5)+1.0, mid_date - start_date, left=start_date, height=0.3, align='center', color='blue', alpha = 0.75)
ax.barh((i*0.5)+1.0-0.05, (mid_date - start_date)*piEffort, left=start_date, height=0.1, align='center', color='red', alpha = 0.75)
ax.barh((i*0.5)+1.0+0.05, (mid_date - start_date)*studentEffort, left=start_date, height=0.1, align='center', color='yellow', alpha = 0.75)
# ax.barh((i*0.5)+1.0, end_date - mid_date, left=mid_date, height=0.3, align='center',label=labels[1], color='yellow')

# Format the y-axis

locsy, labelsy = yticks(pos,ylabels)
plt.setp(labelsy, fontsize = 14)

# Format the x-axis

ax.axis('tight')
ax.set_ylim(ymin = -0.1, ymax = 4.5)
ax.grid(color = 'g', linestyle = ':')

ax.xaxis_date() #Tell matplotlib that these are dates...

rule = rrulewrapper(MONTHLY, interval=1)
loc = RRuleLocator(rule)
formatter = DateFormatter("%b '%y")

ax.xaxis.set_major_locator(loc)
ax.xaxis.set_major_formatter(formatter)
labelsx = ax.get_xticklabels()
plt.setp(labelsx, rotation=30, fontsize=12)

# Format the legend

font = font_manager.FontProperties(size='small')
ax.legend(loc=1,prop=font)

# Finish up
ax.invert_yaxis()
fig.autofmt_xdate()
#plt.savefig('gantt.svg')
plt.show()

XKCD-style Plots in Matplotlib

Now incorporated directly into the latest version of matplotlib (v1.3) here is a great alternative that brings some style to your plotting routines. I haven’t tried it out on plots with a huge number of points but I imagine it should work just fine.  Below are some simple examples.  Simple as matplotlib.pyplot.xkcd()…

Pseudo-Random Sequence with XKCD:

PRS_XKCD

No XKCD:

PRS_noXKCD

Cheers Jake Vanderplas:  http://jakevdp.github.com/blog/2012/10/07/xkcd-style-plots-in-matplotlib/

More Examples:  http://matplotlib.org/xkcd/examples/showcase/xkcd.html