#!/usr/bin/env python # Import MS Project XML File # Copyright 2006 by Brian C. Christensen # This file is part of GanttPV. # # GanttPV is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # GanttPV is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with GanttPV; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # 060529 - started work on this script # 060601 - successful import of task, dependency, resoure, & assignment data from xml file # 060602 - keeps task hierarchy # 060603 - uses the first MS Project calendar for the normal workweek # added task and assignment effort (when not equal defaults) # 060604 - update to logic that eliminates redundant assignment and task effort hours values debug = 1 import re from xml.dom.minidom import parse, parseString # from xml import xpath def getText(nodelist): rc = "" for node in nodelist: if node.nodeType == node.TEXT_NODE: rc = rc + node.data return rc def Import(path): xref = {} # covert between MS Project IDs and GanttPV IDs parents = {} # get ID of MS Project parent task def GetID(UID, table): '''UID is from MS Project table is the GanttPV table name returns GanttPV UI of record ''' key = (UID, table) if not key in xref: change = { 'Table': table } id = Data.Update(change)['ID'] xref[key] = id return xref[key] dom1 = parse(path) # parse an XML file by name # datasource = open('c:\\temp\\mydata.xml') # dom2 = parse(datasource) # parse an open file # dom3 = parseString('Some data some more data') # one at top level n = dom1.getElementsByTagName("Name") if n: name = getText(n[0].childNodes)[:-4] print "Name: %s" % name else: name = None s = dom1.getElementsByTagName("StartDate") if s: d = getText(s[0].childNodes) startdate, pstarttime = d.split("T") print "Start Date is: %s" % startdate else: startdate = None change = { 'Table': 'Project', 'Name': name, 'StartDate': startdate, 'Imported': 'MS Project'} projectid = Data.Update(change)['ID'] # load in calendar #1 workweek = [0] * 7 # count hours per day from calendary dowx = [0, 6, 0, 1, 2, 3, 4, 5] # convert MS Project day to GanttPV day c = dom1.getElementsByTagName("Calendars") if c: callist = c[0].getElementsByTagName("Calendar") else: callist = [] for cal in callist: uid = cal.getElementsByTagName("UID") if uid: # if any names found tuid = getText(uid[0].childNodes) print "Calendar UID: %s" % uid if tuid != '1': continue # only look at first calendar? else: continue # must always be present d = cal.getElementsByTagName("WeekDays") if d: daylist = d[0].getElementsByTagName("WeekDay") else: daylist = [] for day in daylist: daytype = day.getElementsByTagName("DayType") if daytype: # if any names found daytype = int(getText(daytype[0].childNodes)) print "Day Type: %d" % daytype else: daytype = 0 w = day.getElementsByTagName("WorkingTimes") if w: worklist = w[0].getElementsByTagName("WorkingTime") else: worklist = [] for work in worklist: fromtime = work.getElementsByTagName("FromTime") if fromtime: # if found fromtime = getText(fromtime[0].childNodes) else: fromtime = None totime = work.getElementsByTagName("ToTime") if totime: # if found totime = getText(totime[0].childNodes) else: totime = None if fromtime and totime: hours = int(totime[:2]) - int(fromtime[:2]) print "hours added for %d = %d" % (daytype-1, hours) workweek[dowx[daytype]] += hours # hopefully this will never be zeros change = {'Table': 'OtherData', 'ID': 1, 'WeekHours': workweek} Data.Update(change) # load in each of the tasks t = dom1.getElementsByTagName("Tasks") if t: tasklist = t[0].getElementsByTagName("Task") else: tasklist = [] for task in tasklist: uid = task.getElementsByTagName("UID") if uid: # if any names found tuid = getText(uid[0].childNodes) print "UID: %s" % uid if tuid == '0': continue # appears to be a dummy task else: continue # must always be present onum = task.getElementsByTagName("OutlineNumber") if onum: # if any names found onum = getText(onum[0].childNodes) else: onum = None # ignore tasks w/ outline level < 1 # if level > 1 then parent has outline number w/ last part removed olev = task.getElementsByTagName("OutlineLevel") if olev: # if any names found olev = getText(olev[0].childNodes) else: olev = None name = task.getElementsByTagName("Name") if name: # if any names found tname = getText(name[0].childNodes) print "Name: %s" % tname else: tname = "" # Duration, DurationFormat, Work # PercentComplete, PercentWorkComplete duration = task.getElementsByTagName("Duration") if duration: # if any names found d = getText(duration[0].childNodes) m = re.match(r"PT(\d*)H", d) # ignore minutes and seconds hours = int(m.group(1)) print "Raw Duration: %s" % d print "Duration: %d" % hours else: hours = None # Work -> effort # This should be ommitted if it matches "everyone full time" work = task.getElementsByTagName("Work") if work: # if any names found w = getText(work[0].childNodes) m = re.match(r"PT(\d*)H", w) # ignore minutes and seconds effort = int(m.group(1)) print "Raw work: %s" % w print "Effort: %d" % effort else: effort = None cstart = task.getElementsByTagName("ConstraintDate") if cstart: # if any names found d = getText(cstart[0].childNodes) tstart = d.split("T")[0] print "Constraint Date: %s" % tstart else: tstart = None # 'Start' is the equivalent of 'CalculatedStartDate' # start = task.getElementsByTagName("Start") # if start: # if any names found # d = getText(start[0].childNodes) # print "Start Date: %s" % d.split("T")[0] tid = GetID(tuid, 'Task') parents[onum] = tid # build xref if olev and int(olev) > 1: ponum = onum[:onum.rfind('.')] print "onum %s -> parent onum %s" % (onum, ponum) parentid = parents.get(ponum) else: parentid = None change = {'Table': 'Task', 'ID': tid, 'ProjectID': projectid, 'TaskID': parentid, 'Name': tname, 'DurationHours': hours, 'EffortHours': effort, 'StartDate': tstart} Data.Update(change) # PredecessorLink plist = task.getElementsByTagName("PredecessorLink") for pred in plist: ## PredecessorUID. Type, LagFormat uid = pred.getElementsByTagName("PredecessorUID") if uid: # if any names found puid = getText(uid[0].childNodes) print "PredecessorUID: %s" % puid else: continue predid = GetID(puid, 'Task') change = {'Table': 'Dependency', 'TaskID': tid, 'PrerequisiteID': predid} Data.Update(change) # resources r = dom1.getElementsByTagName("Resources") if r: reslist = r[0].getElementsByTagName("Resource") else: reslist = [] for res in reslist: uid = res.getElementsByTagName("UID") if uid: # if any names found ruid = getText(uid[0].childNodes) print "UID: %s" % ruid if ruid == '0': continue # ms project has an extra resource record else: continue name = res.getElementsByTagName("Name") if name: # if any names found rname = getText(name[0].childNodes) print "Name: %s" % rname else: rname = None rid = GetID(ruid, 'Resource') change = {'Table': 'Resource', 'ID': rid, 'ProjectID': projectid, 'Name': rname} Data.Update(change) # assignments a = dom1.getElementsByTagName("Assignments") if a: aslist = a[0].getElementsByTagName("Assignment") else: aslist = [] for as in aslist: uid = as.getElementsByTagName("UID") if uid: # if any names found auid = getText(uid[0].childNodes) print "UID: %s" % auid else: continue if auid == '0': continue tuid = as.getElementsByTagName("TaskUID") if tuid: # if any names found tuid = getText(tuid[0].childNodes) print "TaskUID: %s" % tuid else: continue ruid = as.getElementsByTagName("ResourceUID") if ruid: # if any names found ruid = getText(ruid[0].childNodes) print "ResourceUID: %s" % ruid else: continue # if resource uid is < 0 it means no resource is assigned if ruid[0] == '-': continue # Work -> effort # This should be ommitted if it matches "everyone full time" work = as.getElementsByTagName("Work") if work: # if any names found w = getText(work[0].childNodes) m = re.match(r"PT(\d*)H", w) # ignore minutes and seconds effort = int(m.group(1)) print "Raw work: %s" % w print "Effort: %d" % effort else: effort = None tid = GetID(tuid, 'Task') rid = GetID(ruid, 'Resource') # if assignment effort = task duration then omit it here??? change = {'Table': 'Assignment', 'ProjectID': projectid, 'TaskID': tid, 'ResourceID': rid, 'EffortHours': effort} Data.Update(change) # clear redundant task and assignment effort value dt = Data.Database['Task'] da = Data.Database['Assignment'] tasks = Data.SearchByColumn(dt, {'ProjectID': projectid}) for kt, vt in tasks.iteritems(): assignments = Data.SearchByColumn(da, {'TaskID': kt}) dhours = vt.get('DurationHours') or 0 ehours = vt.get('EffortHours') or 0 if dhours * len(assignments) == ehours: # all full time people change = { 'Table': 'Task', 'ID': kt, 'EffortHours': None } Data.Update(change) ehours = 0 for ka, va in assignments.iteritems(): ahours = va.get('EffortHours') or 0 if ((ehours == 0 and ahours == dhours) or (ehours and ahours == int(ehours / len(assignments)))): change = { 'Table': 'Assignment', 'ID': ka, 'EffortHours': None } Data.Update(change) Data.SetUndo("Import MS Project XML File") # mandatory - to make sure memory used by dom1 is released dom1.unlink() def DoImport(): wildcardx = "XML (*.xml)|*.xml|" "All files (*.*)|*.*" dlg = wx.FileDialog( self, message="Open MS Project XML File ...", defaultFile="", wildcard=wildcardx, style = wx.OPEN ) dlg.SetFilterIndex(0) if dlg.ShowModal() == wx.ID_OK: path = dlg.GetPath() Import(path) dlg.Destroy() DoImport()