# -*- coding: utf-8 -*-
# Description: Document tree
# Documentation: data_structures.txt
from __future__ import print_function
import re
import os.path
import string
from Remark.DocumentType_Registry import documentType, strictDocumentType
from Remark.FileSystem import unixDirectoryName, unixRelativePath, fileExtension
from Remark.FileSystem import globalOptions, withoutFileExtension
from Remark.FileSystem import pathSuffixSet, globToRegex, directoryExists, findMatchingFiles
from Remark.Reporting import Reporter, ScopeGuard
from Remark.Document import Document
class DocumentTree(object):
def __init__(self, rootDirectory, reporter = Reporter()):
'''
Constructs an empty document-tree using the
given directory as a root-directory.
rootDirectory (string):
The full-path to the directory to use as
root-directory of the document-tree.
See also:
compute()
insertDocument()
'''
# The root-directory whose child-directories
# to construct the document-tree for.
self.rootDirectory = rootDirectory
# Create the orphan document, to which all the
# unlinked documents will be given as children.
self.orphan = Document('orphan.remark-orphan')
self.orphan.setTag('description', ['Orphans'])
# A map from a document's relative-name (string) to
# a document-object (Document).
self.documentMap = {'orphan.remark-orphan' : self.orphan}
# A map from a filename (string) to documents
# with that filename (list of Document's).
self.suffixMap = {}
# A map from a directory (string) to a use-count (integer).
# The keys will contain all the directories in which the documents
# reside, and also all their parent-directories up to the
# document-tree's root-directory. The value will contain the number
# of documents that have generated the directory in the key.
# The use-count allows us to dynamically track the directories
# as documents are inserted and removed.
self.directorySet = {}
# A reporter object (Reporter).
self.reporter = reporter
def __iter__(self):
return iter(self.documentMap.values())
def insertDocument(self, relativeName):
'''
Inserts a document into the document-tree.
relativeName:
The relative path to the document w.r.t. to the
root directory of the document-tree.
'''
document = Document(relativeName)
self.documentMap[document.relativeName] = document
# Add the suffixes of the relative-name into the suffix-map.
suffixSet = pathSuffixSet(relativeName)
for suffix in suffixSet:
if suffix in self.suffixMap:
self.suffixMap[suffix].append(document)
else:
self.suffixMap[suffix] = [document]
# Gather the directory of the file, and the
# parent directories of that directory.
directory = document.relativeDirectory
while True:
if not directory in self.directorySet:
self.directorySet[directory] = 1
else:
self.directorySet[directory] += 1
newDirectory = os.path.normpath(os.path.split(directory)[0])
if newDirectory == directory:
break
directory = newDirectory
return document
def resolveParentLinks(self):
'''
Resolves explicit, implicit, and reference parent links.
Those documents which remain without a parent are linked to
the orphan document.
See also:
insertDocument()
'''
# Resolve explicit links
with ScopeGuard(self.reporter, 'Resolving explicit links'):
self._resolveExplicitLinks()
self.reporter.report([None, 'Done.', None], 'verbose')
# Resolve implicit links
with ScopeGuard(self.reporter, 'Resolving implicit links'):
self._resolveImplicitLinks()
self.reporter.report([None, 'Done.', None], 'verbose')
# Resolve reference links
with ScopeGuard(self.reporter, 'Resolving reference links'):
self._resolveReferenceLinks()
self.reporter.report([None, 'Done.', None], 'verbose')
# Link orphans
#with ScopeGuard(self.reporter, 'Linking orphans'):
self._linkOrphans()
#self.reporter.report([None, 'Done.', None], 'verbose')
def findDocumentLocal(self, filePath, searchDirectory):
'''
Searches for a document corresponding to the given
file path in the given search directory.
filePath:
A relative-path to the file, relative to
the given search-directory.
searchDirectory:
A relative-path to the search-directory, relative to
the document-tree root directory.
returns:
If the document is found, the document object.
Otherwise None.
See also:
findDocumentUpwards().
'''
relativeName = unixDirectoryName(
os.path.join(searchDirectory, filePath))
return self.findDocumentByRelativeName(relativeName)
def findDocumentByRelativeName(self, relativeName):
'''
Searches for a document by relative-name in all of the
document-tree.
relativeName:
The path to the document file, relative to the
document-tree's root directory.
returns:
The document-object if found, None otherwise.
See also:
findDocumentUpwards().
'''
return self.documentMap.get(relativeName)
def findDocumentUpwards(self, filePath, searchDirectory):
'''
Searches for a document corresponding to the given file path
starting from the given search-directory and progressing
to the parent-directories on unsuccessful searches. The search
terminates on the document-tree's root directory.
filePath:
A relative-path to the file to search for, relative to the
current search-directory (which changes during the algorithm).
searchDirectory:
A relative-path to the directory from which to start searching,
relative to the document-tree root-directory.
returns:
The document-object if successful, None otherwise.
'''
while True:
document = self.findDocumentLocal(filePath, searchDirectory)
if document != None:
return document
# Document file was not found, try the parent directory.
parentDirectory = os.path.normpath(os.path.split(searchDirectory)[0])
if parentDirectory == searchDirectory:
# We are at the root, stop here.
break
searchDirectory = parentDirectory
# Document file was not found.
return None
def findDocument(self, filePath, searchDirectory, checkShorter = True):
'''
Searches for a document corresponding to the given file path
first from the given search-directory upwards, and if not
successful, then from all of the document-tree (then the
document can be ambiguous).
filePath:
A relative-path to the file to search for, relative to the
current search-directory (which changes during the algorithm).
searchDirectory:
The directory from which to start searching, relative to
the document-tree root-directory.
returns:
A pair (document, unique), such that 'document' contains the
found document-object (possibly None) and 'unique' is a
boolean that tells whether the choice was unique or not.
If the choise is not unique, the document is picked arbitrarily
over the choices.
'''
document = None
unique = True
documentSet = []
filePath = unixDirectoryName(filePath)
# Search the document by following parent-directories.
document = self.findDocumentUpwards(filePath, searchDirectory)
if document != None:
# The document was found by following parent-directories.
# Add the document to the found-set.
documentSet.append(document)
else:
# The document could be found at any directory
# of the document-tree. Find out if the file-path
# is a path-suffix of any document's relative-name.
documentSet = self.suffixMap.get(filePath, [])
if len(documentSet) == 0:
# There is no such matching path-suffix of a relative-name
# in the document-tree. It may still be that the file-path is
# a relative path which contains .. parent-references.
# Consider that a parent-reference .. has a directory A
# on its left-hand side; then the A and .. can be removed
# without affecting the path (unixDirectoryName does
# this). Therefore what we would have is a kind of
# relative-path which has n parent-references at the
# start, and then normal directories. At this point
# we don't find it useful to search such relative-paths
# over all document-tree directories.
unique = True
elif len(documentSet) == 1:
# There is a unique document whose suffix of the
# relative-name matches the file-path.
document = documentSet[0]
unique = True
if checkShorter:
# Check whether this search could have been equivalently
# done with a shorter path-suffix.
shortestSuffix = ''
n = len(filePath)
i = 0
while i < n:
if filePath[i] == '/':
suffix = filePath[i + 1 : ]
checkDocument, checkUnique = \
self.findDocument(suffix, searchDirectory, False)
if checkDocument == document and checkUnique:
shortestSuffix = suffix
else:
break
i += 1
if shortestSuffix != '':
# A shorter path-suffix would have sufficed.
self.reporter.reportWarning('Used path ' + filePath +
' where a shorter path ' + shortestSuffix +
' would have been unambiguous.',
'redundant-path')
else:
# There are multiple files whose suffix of the
# relative-name matches the file-path.
# Return an arbitrary file.
document = documentSet[0]
unique = False
return (document, unique)
def fullName(self, document):
'''
Returns the join of the document-tree's root directory
and the relative-name of the document.
'''
return os.path.normpath(os.path.join(self.rootDirectory, document.relativeName))
# Private stuff
def _resolveExplicitLinks(self):
'''
Resolves the parent-child relationships between
Document objects in those cases where a document file
explicitly specifies a parent file using a tag.
'''
for document in self.documentMap.values():
# Documents which are not associated to a document
# type are linked to orphan straight away.
if strictDocumentType(document.extension) == None:
document.parent = self.orphan
# See if the document specifies a parent document.
if 'parent' in document.tagSet:
# The parent file path in the tag is given relative to
# the directory containing the document file.
parentName = document.tagString('parent').strip()
# See if we can find the parent document.
parent, unique = self.findDocument(parentName,
document.relativeDirectory)
if not unique:
self.reporter.reportWarning('Parent "' + parentName + '" is ambiguous for ' +
document.relativeName + '.',
'ambiguous-parent')
if parent == None:
if parentName != '':
# If a parent file can't be found, it can be
# because of a typo in the tag or a missing file.
# In any case we warn the user.
self.reporter.reportWarning('Parent "' + parentName + '" was not found for ' +
document.relativeName + '.',
'missing-parent')
else:
# Parent file was found. Update parent-child pointers.
parent.insertChild(document)
elif 'parentOf' in document.tagSet:
# This file uses a reference link, which will be
# handled later.
None
elif document.documentType.name() == 'RemarkPage':
# This file is a documentation file and it is
# missing a parent tag. Warn about that.
self.reporter.reportWarning(document.relativeName +
' does not specify a parent file.',
'unspecified-parent')
def _resolveImplicitLinks(self):
'''
Resolves the parent-child relationships between
Document objects in those cases where no
parent file has been specified explicitly.
'''
# An implicit parent file can be obtained in two
# ways:
# 1) As a documentation file (.txt) whose filename
# without the suffix is a prefix of the document's
# filename.
# 2) As the parent of a source file whose filename
# without the suffix is a prefix of the document's
# filename _and_ whose parent has been specified
# explicitly.
#
# In this process, lengthier prefixes are favored
# over shorter ones. Given prefixes of the same length,
# documentation files (.txt) are favored over source
# files. The search is restricted to the directory
# containing the document. The implicit linking only
# concerns source files. Documentation files (.txt)
# without an explicit parent emit a warning.
# Place the documents in an array sorted
# by their relative file paths. This allows
# us to find the prefix files. In addition
# shorter filenames are listed before longer ones.
sortedMap = []
for document in self.documentMap.items():
sortedMap.append(document[1])
sortedMap.sort(key = lambda x: x.relativeName)
#for document in sortedMap:
# print(document.fileName)
for i in range(0, len(sortedMap)):
document = sortedMap[i]
# The implicit linking is done only when
# a file does not have a parent and
# does not use reference linking.
if document.parent != None or 'parentOf' in document.tagSet:
continue
self.reporter.report(
'Implicit linking for ' + document.relativeName + ' ...',
'debug-implicit')
searchDirectory = document.relativeDirectory
searchName = withoutFileExtension(document.fileName)
# The implicit linking only concerns
# source files.
if document.tagString('document_type') != 'RemarkPage':
# Find the last document in the array
# which has identical filename to the searched
# one, without considering suffixes.
# After doing this, all the candidates are
# located at lower indices, and they will
# come in shortening order.
lastIndex = i + 1
while lastIndex < len(sortedMap):
thatDocument = sortedMap[lastIndex]
thatDirectory = thatDocument.relativeDirectory
# The search is restricted to the containing
# directory.
if thatDirectory != searchDirectory:
break
thatName = withoutFileExtension(thatDocument.fileName)
if thatName != searchName:
break
lastIndex = lastIndex + 1
# Visit the documents in the array in decreasing order
# (decreasing prefix length).
sourceMatches = set()
documentationMatches = set()
matchLength = 0
matchesFound = 0;
k = lastIndex - 1;
while k >= 0:
# The document itself is not accepted as
# its own parent.
if k != i:
thatDocument = sortedMap[k]
thatDirectory = thatDocument.relativeDirectory
# The search is restricted to the containing
# directory.
if thatDirectory != searchDirectory:
break
# The filename of the parent document must be a
# prefix of the documents filename, without
# suffixes.
thatName = withoutFileExtension(thatDocument.fileName)
if searchName.startswith(thatName):
# If we have already found a match,
# and we find a candidate which
# is shorter than our existing match,
# we can stop the search.
if matchesFound > 0 and len(thatName) < matchLength:
break
# Either the file must be a documentation file
# or it must have a parent.
#print('Candidate', thatDocument.relativeName)
if thatDocument.tagString('document_type') == 'RemarkPage':
# We have found a match from a documentation
# file!
documentationMatches.add(thatDocument)
matchesFound = matchesFound + 1
matchLength = len(thatName)
# In this case we are safe to stop because
# there can't be another documentation file
# of the same length, and documentation files
# are favored over source files.
break
elif thatDocument.parent != None:
# We have found a match from a source file!
sourceMatches.add(thatDocument.parent)
matchesFound = matchesFound + 1
matchLength = len(thatName)
# We can't stop the search yet.
# We still have to see what other choices
# there are. For example, it could be
# that there is matching documentation file
# of the same length coming up.
# Proceed to the next document in the array.
k = k - 1
# See if we found an implicit match.
if len(documentationMatches) > 0:
# There is a match from a documentation file.
# Documentation files are favored to source files
newParent = documentationMatches.pop()
newParent.insertChild(document)
elif len(sourceMatches) >= 1:
# There is a match from a source file.
if len(sourceMatches) > 1:
message = ['Ambiguous parent for ' +
document.relativeName +
'. Picking arbitrarily from:']
for match in sourceMatches:
message.append(match.relativeName)
self.reporter.reportWarning(message, 'ambiguous-parent')
newParent = sourceMatches.pop()
newParent.insertChild(document)
if document.parent != None:
self.reporter.report(
'Found ' + document.parent.relativeName + '.',
'debug-implicit')
def _resolveReferenceLinks(self):
'''
Resolves the parent-child relationships between
Document objects in those cases where a document file
specifies a parent file as the parent file of
another file.
'''
for document in self.documentMap.values():
# Reference linking is done only when no parent has been
# found through explicit or implicit linking, and
# a reference link has been specified.
if document.parent != None or not ('parentOf' in document.tagSet):
continue
# The parent file path in the tag is given relative to
# the directory containing the document file.
referenceName = document.tagString('parentOf')
reference, unique = self.findDocument(referenceName,
document.relativeDirectory)
if reference == None:
# If a reference file can't be found, it can be
# because of a typo in the tag or a missing file.
# In any case we warn the user.
self.reporter.reportWarning('Reference parent file was not found for ' +
document.relativeName + '. The search was for: ' +
referenceName,
'missing-parent')
else:
# Reference file was found. Use
# its parent as the parent of this document.
if reference.parent != None:
reference.parent.insertChild(document)
else:
self.reporter.reportWarning(referenceName + ', referenced by ' +
document.relativeName +
', does not have an associated parent file.',
'unspecified-parent')
def _linkOrphans(self):
'''
Links all documents without a parent
as children of the root document. This
turns the graph induced by parent-child
relationships into a tree.
'''
for document in self.documentMap.values():
if document.parent == None:
self.orphan.insertChild(document)
def createDocumentTree(argumentSet, reporter):
'''
Inserts all matching files in the directory into the document tree.
A file matches if it matches an include glob, and does not match
an exclude glob.
Also creates a directory.index document to each directory.
argumentSet (ArgumentSet):
A structure object containing the following
fields.
ArgumentSet
-----------
inputDirectory (string):
The directory from which to insert files into the document tree.
outputDirectory (string):
The directory to which to converted files will be placed.
includeSet (iterable of glob strings):
A set of glob strings each defining a set of
relative-names of files to include. These names are
relative to the include directory.
excludeSet (iterable of glob strings):
A set of glob strings each defining a set of
relative-names of files to exclude. These names are
relative to the include directory. Exclusion has
priority over inclusion.
disableSet (iterable of strings):
A set of warning-types that should not be reported.
maxTagLines (integer):
The maximum number of lines to scan for tags.
maxFileSize (integer):
The maximum file size to convert. This limit is
useful when a directory contains a large matching
file which actually shouldn't be part of the
documentation.
strict (boolean):
Whether warnings should be treated as errors.
verbose (boolean):
Whether additional information should be reported.
extensions (boolean):
Whether to report the extensions in the input directory.
returns (DocumentTree):
The created document tree.
'''
if not directoryExists(argumentSet.inputDirectory):
reporter.reportError('Input directory ' + argumentSet.inputDirectory + ' does not exist.',
'invalid-input')
return None
# Construct an empty document-tree for the input directory.
documentTree = DocumentTree(argumentSet.inputDirectory, reporter)
# Find the matching files.
relativeNameSet = findMatchingFiles(argumentSet.inputDirectory,
argumentSet.includeSet, argumentSet.excludeSet)
# Add the matching files into the document tree.
for relativeName in relativeNameSet:
documentTree.insertDocument(relativeName)
# Generate a directory.remark-index virtual document to each
# directory. This provides the directory listings.
for directory in documentTree.directorySet:
# Form the relative name of the document.
relativeName = os.path.join(directory, 'directory.remark-index')
# Insert a document with that relative name.
document = documentTree.insertDocument(relativeName)
# Give the document the description from the unix-style
# directory name combined with a '/' to differentiate
# visually that it is a directory.
description = unixDirectoryName(document.relativeDirectory) + '/'
# Add the description to the document.
document.setTag('description', [description])
return documentTree