Mercurial
view hgext/convert/subversion.py @ 10207:f5e55f1ca927
Merge with stable
| author | Martin Geisler <mg@lazybytes.net> |
|---|---|
| date | Mon, 04 Jan 2010 01:11:18 +0100 |
| parents | a02d43acbc04 |
| children | 08a0f04b56bd |
line source
1 # Subversion 1.4/1.5 Python API backend
2 #
3 # Copyright(C) 2007 Daniel Holth et al
5 import os
6 import re
7 import sys
8 import cPickle as pickle
9 import tempfile
10 import urllib
11 import urllib2
13 from mercurial import strutil, util, encoding
14 from mercurial.i18n import _
16 # Subversion stuff. Works best with very recent Python SVN bindings
17 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
18 # these bindings.
20 from cStringIO import StringIO
22 from common import NoRepo, MissingTool, commit, encodeargs, decodeargs
23 from common import commandline, converter_source, converter_sink, mapfile
25 try:
26 from svn.core import SubversionException, Pool
27 import svn
28 import svn.client
29 import svn.core
30 import svn.ra
31 import svn.delta
32 import transport
33 import warnings
34 warnings.filterwarnings('ignore',
35 module='svn.core',
36 category=DeprecationWarning)
38 except ImportError:
39 pass
41 class SvnPathNotFound(Exception):
42 pass
44 def geturl(path):
45 try:
46 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
47 except SubversionException:
48 pass
49 if os.path.isdir(path):
50 path = os.path.normpath(os.path.abspath(path))
51 if os.name == 'nt':
52 path = '/' + util.normpath(path)
53 # Module URL is later compared with the repository URL returned
54 # by svn API, which is UTF-8.
55 path = encoding.tolocal(path)
56 return 'file://%s' % urllib.quote(path)
57 return path
59 def optrev(number):
60 optrev = svn.core.svn_opt_revision_t()
61 optrev.kind = svn.core.svn_opt_revision_number
62 optrev.value.number = number
63 return optrev
65 class changedpath(object):
66 def __init__(self, p):
67 self.copyfrom_path = p.copyfrom_path
68 self.copyfrom_rev = p.copyfrom_rev
69 self.action = p.action
71 def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
72 strict_node_history=False):
73 protocol = -1
74 def receiver(orig_paths, revnum, author, date, message, pool):
75 if orig_paths is not None:
76 for k, v in orig_paths.iteritems():
77 orig_paths[k] = changedpath(v)
78 pickle.dump((orig_paths, revnum, author, date, message),
79 fp, protocol)
81 try:
82 # Use an ra of our own so that our parent can consume
83 # our results without confusing the server.
84 t = transport.SvnRaTransport(url=url)
85 svn.ra.get_log(t.ra, paths, start, end, limit,
86 discover_changed_paths,
87 strict_node_history,
88 receiver)
89 except SubversionException, (inst, num):
90 pickle.dump(num, fp, protocol)
91 except IOError:
92 # Caller may interrupt the iteration
93 pickle.dump(None, fp, protocol)
94 else:
95 pickle.dump(None, fp, protocol)
96 fp.close()
97 # With large history, cleanup process goes crazy and suddenly
98 # consumes *huge* amount of memory. The output file being closed,
99 # there is no need for clean termination.
100 os._exit(0)
102 def debugsvnlog(ui, **opts):
103 """Fetch SVN log in a subprocess and channel them back to parent to
104 avoid memory collection issues.
105 """
106 util.set_binary(sys.stdin)
107 util.set_binary(sys.stdout)
108 args = decodeargs(sys.stdin.read())
109 get_log_child(sys.stdout, *args)
111 class logstream(object):
112 """Interruptible revision log iterator."""
113 def __init__(self, stdout):
114 self._stdout = stdout
116 def __iter__(self):
117 while True:
118 try:
119 entry = pickle.load(self._stdout)
120 except EOFError:
121 raise util.Abort(_('Mercurial failed to run itself, check'
122 ' hg executable is in PATH'))
123 try:
124 orig_paths, revnum, author, date, message = entry
125 except:
126 if entry is None:
127 break
128 raise SubversionException("child raised exception", entry)
129 yield entry
131 def close(self):
132 if self._stdout:
133 self._stdout.close()
134 self._stdout = None
137 # Check to see if the given path is a local Subversion repo. Verify this by
138 # looking for several svn-specific files and directories in the given
139 # directory.
140 def filecheck(ui, path, proto):
141 for x in ('locks', 'hooks', 'format', 'db', ):
142 if not os.path.exists(os.path.join(path, x)):
143 return False
144 return True
146 # Check to see if a given path is the root of an svn repo over http. We verify
147 # this by requesting a version-controlled URL we know can't exist and looking
148 # for the svn-specific "not found" XML.
149 def httpcheck(ui, path, proto):
150 try:
151 opener = urllib2.build_opener()
152 rsp = opener.open('%s://%s/!svn/ver/0/.svn' % (proto, path))
153 data = rsp.read()
154 except urllib2.HTTPError, inst:
155 if inst.code != 404:
156 # Except for 404 we cannot know for sure this is not an svn repo
157 ui.warn(_('svn: cannot probe remote repository, assume it could '
158 'be a subversion repository. Use --source-type if you '
159 'know better.\n'))
160 return True
161 data = inst.fp.read()
162 except:
163 # Could be urllib2.URLError if the URL is invalid or anything else.
164 return False
165 return '<m:human-readable errcode="160013">' in data
167 protomap = {'http': httpcheck,
168 'https': httpcheck,
169 'file': filecheck,
170 }
171 def issvnurl(ui, url):
172 try:
173 proto, path = url.split('://', 1)
174 if proto == 'file':
175 path = urllib.url2pathname(path)
176 except ValueError:
177 proto = 'file'
178 path = os.path.abspath(url)
179 if proto == 'file':
180 path = path.replace(os.sep, '/')
181 check = protomap.get(proto, lambda p, p2: False)
182 while '/' in path:
183 if check(ui, path, proto):
184 return True
185 path = path.rsplit('/', 1)[0]
186 return False
188 # SVN conversion code stolen from bzr-svn and tailor
189 #
190 # Subversion looks like a versioned filesystem, branches structures
191 # are defined by conventions and not enforced by the tool. First,
192 # we define the potential branches (modules) as "trunk" and "branches"
193 # children directories. Revisions are then identified by their
194 # module and revision number (and a repository identifier).
195 #
196 # The revision graph is really a tree (or a forest). By default, a
197 # revision parent is the previous revision in the same module. If the
198 # module directory is copied/moved from another module then the
199 # revision is the module root and its parent the source revision in
200 # the parent module. A revision has at most one parent.
201 #
202 class svn_source(converter_source):
203 def __init__(self, ui, url, rev=None):
204 super(svn_source, self).__init__(ui, url, rev=rev)
206 if not (url.startswith('svn://') or url.startswith('svn+ssh://') or
207 (os.path.exists(url) and
208 os.path.exists(os.path.join(url, '.svn'))) or
209 issvnurl(ui, url)):
210 raise NoRepo("%s does not look like a Subversion repo" % url)
212 try:
213 SubversionException
214 except NameError:
215 raise MissingTool(_('Subversion python bindings could not be loaded'))
217 try:
218 version = svn.core.SVN_VER_MAJOR, svn.core.SVN_VER_MINOR
219 if version < (1, 4):
220 raise MissingTool(_('Subversion python bindings %d.%d found, '
221 '1.4 or later required') % version)
222 except AttributeError:
223 raise MissingTool(_('Subversion python bindings are too old, 1.4 '
224 'or later required'))
226 self.lastrevs = {}
228 latest = None
229 try:
230 # Support file://path@rev syntax. Useful e.g. to convert
231 # deleted branches.
232 at = url.rfind('@')
233 if at >= 0:
234 latest = int(url[at+1:])
235 url = url[:at]
236 except ValueError:
237 pass
238 self.url = geturl(url)
239 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
240 try:
241 self.transport = transport.SvnRaTransport(url=self.url)
242 self.ra = self.transport.ra
243 self.ctx = self.transport.client
244 self.baseurl = svn.ra.get_repos_root(self.ra)
245 # Module is either empty or a repository path starting with
246 # a slash and not ending with a slash.
247 self.module = urllib.unquote(self.url[len(self.baseurl):])
248 self.prevmodule = None
249 self.rootmodule = self.module
250 self.commits = {}
251 self.paths = {}
252 self.uuid = svn.ra.get_uuid(self.ra)
253 except SubversionException:
254 ui.traceback()
255 raise NoRepo("%s does not look like a Subversion repo" % self.url)
257 if rev:
258 try:
259 latest = int(rev)
260 except ValueError:
261 raise util.Abort(_('svn: revision %s is not an integer') % rev)
263 self.startrev = self.ui.config('convert', 'svn.startrev', default=0)
264 try:
265 self.startrev = int(self.startrev)
266 if self.startrev < 0:
267 self.startrev = 0
268 except ValueError:
269 raise util.Abort(_('svn: start revision %s is not an integer')
270 % self.startrev)
272 self.head = self.latest(self.module, latest)
273 if not self.head:
274 raise util.Abort(_('no revision found in module %s')
275 % self.module)
276 self.last_changed = self.revnum(self.head)
278 self._changescache = None
280 if os.path.exists(os.path.join(url, '.svn/entries')):
281 self.wc = url
282 else:
283 self.wc = None
284 self.convertfp = None
286 def setrevmap(self, revmap):
287 lastrevs = {}
288 for revid in revmap.iterkeys():
289 uuid, module, revnum = self.revsplit(revid)
290 lastrevnum = lastrevs.setdefault(module, revnum)
291 if revnum > lastrevnum:
292 lastrevs[module] = revnum
293 self.lastrevs = lastrevs
295 def exists(self, path, optrev):
296 try:
297 svn.client.ls(self.url.rstrip('/') + '/' + urllib.quote(path),
298 optrev, False, self.ctx)
299 return True
300 except SubversionException:
301 return False
303 def getheads(self):
305 def isdir(path, revnum):
306 kind = self._checkpath(path, revnum)
307 return kind == svn.core.svn_node_dir
309 def getcfgpath(name, rev):
310 cfgpath = self.ui.config('convert', 'svn.' + name)
311 if cfgpath is not None and cfgpath.strip() == '':
312 return None
313 path = (cfgpath or name).strip('/')
314 if not self.exists(path, rev):
315 if cfgpath:
316 raise util.Abort(_('expected %s to be at %r, but not found')
317 % (name, path))
318 return None
319 self.ui.note(_('found %s at %r\n') % (name, path))
320 return path
322 rev = optrev(self.last_changed)
323 oldmodule = ''
324 trunk = getcfgpath('trunk', rev)
325 self.tags = getcfgpath('tags', rev)
326 branches = getcfgpath('branches', rev)
328 # If the project has a trunk or branches, we will extract heads
329 # from them. We keep the project root otherwise.
330 if trunk:
331 oldmodule = self.module or ''
332 self.module += '/' + trunk
333 self.head = self.latest(self.module, self.last_changed)
334 if not self.head:
335 raise util.Abort(_('no revision found in module %s')
336 % self.module)
338 # First head in the list is the module's head
339 self.heads = [self.head]
340 if self.tags is not None:
341 self.tags = '%s/%s' % (oldmodule , (self.tags or 'tags'))
343 # Check if branches bring a few more heads to the list
344 if branches:
345 rpath = self.url.strip('/')
346 branchnames = svn.client.ls(rpath + '/' + urllib.quote(branches),
347 rev, False, self.ctx)
348 for branch in branchnames.keys():
349 module = '%s/%s/%s' % (oldmodule, branches, branch)
350 if not isdir(module, self.last_changed):
351 continue
352 brevid = self.latest(module, self.last_changed)
353 if not brevid:
354 self.ui.note(_('ignoring empty branch %s\n') % branch)
355 continue
356 self.ui.note(_('found branch %s at %d\n') %
357 (branch, self.revnum(brevid)))
358 self.heads.append(brevid)
360 if self.startrev and self.heads:
361 if len(self.heads) > 1:
362 raise util.Abort(_('svn: start revision is not supported '
363 'with more than one branch'))
364 revnum = self.revnum(self.heads[0])
365 if revnum < self.startrev:
366 raise util.Abort(_('svn: no revision found after start revision %d')
367 % self.startrev)
369 return self.heads
371 def getfile(self, file, rev):
372 data, mode = self._getfile(file, rev)
373 self.modecache[(file, rev)] = mode
374 return data
376 def getmode(self, file, rev):
377 return self.modecache[(file, rev)]
379 def getchanges(self, rev):
380 if self._changescache and self._changescache[0] == rev:
381 return self._changescache[1]
382 self._changescache = None
383 self.modecache = {}
384 (paths, parents) = self.paths[rev]
385 if parents:
386 files, copies = self.expandpaths(rev, paths, parents)
387 else:
388 # Perform a full checkout on roots
389 uuid, module, revnum = self.revsplit(rev)
390 entries = svn.client.ls(self.baseurl + urllib.quote(module),
391 optrev(revnum), True, self.ctx)
392 files = [n for n,e in entries.iteritems()
393 if e.kind == svn.core.svn_node_file]
394 copies = {}
396 files.sort()
397 files = zip(files, [rev] * len(files))
399 # caller caches the result, so free it here to release memory
400 del self.paths[rev]
401 return (files, copies)
403 def getchangedfiles(self, rev, i):
404 changes = self.getchanges(rev)
405 self._changescache = (rev, changes)
406 return [f[0] for f in changes[0]]
408 def getcommit(self, rev):
409 if rev not in self.commits:
410 uuid, module, revnum = self.revsplit(rev)
411 self.module = module
412 self.reparent(module)
413 # We assume that:
414 # - requests for revisions after "stop" come from the
415 # revision graph backward traversal. Cache all of them
416 # down to stop, they will be used eventually.
417 # - requests for revisions before "stop" come to get
418 # isolated branches parents. Just fetch what is needed.
419 stop = self.lastrevs.get(module, 0)
420 if revnum < stop:
421 stop = revnum + 1
422 self._fetch_revisions(revnum, stop)
423 commit = self.commits[rev]
424 # caller caches the result, so free it here to release memory
425 del self.commits[rev]
426 return commit
428 def gettags(self):
429 tags = {}
430 if self.tags is None:
431 return tags
433 # svn tags are just a convention, project branches left in a
434 # 'tags' directory. There is no other relationship than
435 # ancestry, which is expensive to discover and makes them hard
436 # to update incrementally. Worse, past revisions may be
437 # referenced by tags far away in the future, requiring a deep
438 # history traversal on every calculation. Current code
439 # performs a single backward traversal, tracking moves within
440 # the tags directory (tag renaming) and recording a new tag
441 # everytime a project is copied from outside the tags
442 # directory. It also lists deleted tags, this behaviour may
443 # change in the future.
444 pendings = []
445 tagspath = self.tags
446 start = svn.ra.get_latest_revnum(self.ra)
447 try:
448 for entry in self._getlog([self.tags], start, self.startrev):
449 origpaths, revnum, author, date, message = entry
450 copies = [(e.copyfrom_path, e.copyfrom_rev, p) for p, e
451 in origpaths.iteritems() if e.copyfrom_path]
452 # Apply moves/copies from more specific to general
453 copies.sort(reverse=True)
455 srctagspath = tagspath
456 if copies and copies[-1][2] == tagspath:
457 # Track tags directory moves
458 srctagspath = copies.pop()[0]
460 for source, sourcerev, dest in copies:
461 if not dest.startswith(tagspath + '/'):
462 continue
463 for tag in pendings:
464 if tag[0].startswith(dest):
465 tagpath = source + tag[0][len(dest):]
466 tag[:2] = [tagpath, sourcerev]
467 break
468 else:
469 pendings.append([source, sourcerev, dest])
471 # Filter out tags with children coming from different
472 # parts of the repository like:
473 # /tags/tag.1 (from /trunk:10)
474 # /tags/tag.1/foo (from /branches/foo:12)
475 # Here/tags/tag.1 discarded as well as its children.
476 # It happens with tools like cvs2svn. Such tags cannot
477 # be represented in mercurial.
478 addeds = dict((p, e.copyfrom_path) for p, e
479 in origpaths.iteritems()
480 if e.action == 'A' and e.copyfrom_path)
481 badroots = set()
482 for destroot in addeds:
483 for source, sourcerev, dest in pendings:
484 if (not dest.startswith(destroot + '/')
485 or source.startswith(addeds[destroot] + '/')):
486 continue
487 badroots.add(destroot)
488 break
490 for badroot in badroots:
491 pendings = [p for p in pendings if p[2] != badroot
492 and not p[2].startswith(badroot + '/')]
494 # Tell tag renamings from tag creations
495 remainings = []
496 for source, sourcerev, dest in pendings:
497 tagname = dest.split('/')[-1]
498 if source.startswith(srctagspath):
499 remainings.append([source, sourcerev, tagname])
500 continue
501 if tagname in tags:
502 # Keep the latest tag value
503 continue
504 # From revision may be fake, get one with changes
505 try:
506 tagid = self.latest(source, sourcerev)
507 if tagid and tagname not in tags:
508 tags[tagname] = tagid
509 except SvnPathNotFound:
510 # It happens when we are following directories
511 # we assumed were copied with their parents
512 # but were really created in the tag
513 # directory.
514 pass
515 pendings = remainings
516 tagspath = srctagspath
518 except SubversionException:
519 self.ui.note(_('no tags found at revision %d\n') % start)
520 return tags
522 def converted(self, rev, destrev):
523 if not self.wc:
524 return
525 if self.convertfp is None:
526 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
527 'a')
528 self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
529 self.convertfp.flush()
531 def revid(self, revnum, module=None):
532 return 'svn:%s%s@%s' % (self.uuid, module or self.module, revnum)
534 def revnum(self, rev):
535 return int(rev.split('@')[-1])
537 def revsplit(self, rev):
538 url, revnum = rev.rsplit('@', 1)
539 revnum = int(revnum)
540 parts = url.split('/', 1)
541 uuid = parts.pop(0)[4:]
542 mod = ''
543 if parts:
544 mod = '/' + parts[0]
545 return uuid, mod, revnum
547 def latest(self, path, stop=0):
548 """Find the latest revid affecting path, up to stop. It may return
549 a revision in a different module, since a branch may be moved without
550 a change being reported. Return None if computed module does not
551 belong to rootmodule subtree.
552 """
553 if not path.startswith(self.rootmodule):
554 # Requests on foreign branches may be forbidden at server level
555 self.ui.debug('ignoring foreign branch %r\n' % path)
556 return None
558 if not stop:
559 stop = svn.ra.get_latest_revnum(self.ra)
560 try:
561 prevmodule = self.reparent('')
562 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
563 self.reparent(prevmodule)
564 except SubversionException:
565 dirent = None
566 if not dirent:
567 raise SvnPathNotFound(_('%s not found up to revision %d') % (path, stop))
569 # stat() gives us the previous revision on this line of
570 # development, but it might be in *another module*. Fetch the
571 # log and detect renames down to the latest revision.
572 stream = self._getlog([path], stop, dirent.created_rev)
573 try:
574 for entry in stream:
575 paths, revnum, author, date, message = entry
576 if revnum <= dirent.created_rev:
577 break
579 for p in paths:
580 if not path.startswith(p) or not paths[p].copyfrom_path:
581 continue
582 newpath = paths[p].copyfrom_path + path[len(p):]
583 self.ui.debug("branch renamed from %s to %s at %d\n" %
584 (path, newpath, revnum))
585 path = newpath
586 break
587 finally:
588 stream.close()
590 if not path.startswith(self.rootmodule):
591 self.ui.debug('ignoring foreign branch %r\n' % path)
592 return None
593 return self.revid(dirent.created_rev, path)
595 def reparent(self, module):
596 """Reparent the svn transport and return the previous parent."""
597 if self.prevmodule == module:
598 return module
599 svnurl = self.baseurl + urllib.quote(module)
600 prevmodule = self.prevmodule
601 if prevmodule is None:
602 prevmodule = ''
603 self.ui.debug("reparent to %s\n" % svnurl)
604 svn.ra.reparent(self.ra, svnurl)
605 self.prevmodule = module
606 return prevmodule
608 def expandpaths(self, rev, paths, parents):
609 entries = []
610 # Map of entrypath, revision for finding source of deleted
611 # revisions.
612 copyfrom = {}
613 copies = {}
615 new_module, revnum = self.revsplit(rev)[1:]
616 if new_module != self.module:
617 self.module = new_module
618 self.reparent(self.module)
620 for path, ent in paths:
621 entrypath = self.getrelpath(path)
623 kind = self._checkpath(entrypath, revnum)
624 if kind == svn.core.svn_node_file:
625 entries.append(self.recode(entrypath))
626 if not ent.copyfrom_path or not parents:
627 continue
628 # Copy sources not in parent revisions cannot be
629 # represented, ignore their origin for now
630 pmodule, prevnum = self.revsplit(parents[0])[1:]
631 if ent.copyfrom_rev < prevnum:
632 continue
633 copyfrom_path = self.getrelpath(ent.copyfrom_path, pmodule)
634 if not copyfrom_path:
635 continue
636 self.ui.debug("copied to %s from %s@%s\n" %
637 (entrypath, copyfrom_path, ent.copyfrom_rev))
638 copies[self.recode(entrypath)] = self.recode(copyfrom_path)
639 elif kind == 0: # gone, but had better be a deleted *file*
640 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
641 pmodule, prevnum = self.revsplit(parents[0])[1:]
642 parentpath = pmodule + "/" + entrypath
643 self.ui.debug("entry %s\n" % parentpath)
645 # We can avoid the reparent calls if the module has
646 # not changed but it probably does not worth the pain.
647 prevmodule = self.reparent('')
648 fromkind = svn.ra.check_path(self.ra, parentpath.strip('/'), prevnum)
649 self.reparent(prevmodule)
651 if fromkind == svn.core.svn_node_file:
652 entries.append(self.recode(entrypath))
653 elif fromkind == svn.core.svn_node_dir:
654 if ent.action == 'C':
655 children = self._find_children(path, prevnum)
656 else:
657 oroot = parentpath.strip('/')
658 nroot = path.strip('/')
659 children = self._find_children(oroot, prevnum)
660 children = [s.replace(oroot,nroot) for s in children]
662 for child in children:
663 childpath = self.getrelpath("/" + child, pmodule)
664 if not childpath:
665 continue
666 if childpath in copies:
667 del copies[childpath]
668 entries.append(childpath)
669 else:
670 self.ui.debug('unknown path in revision %d: %s\n' % \
671 (revnum, path))
672 elif kind == svn.core.svn_node_dir:
673 # If the directory just had a prop change,
674 # then we shouldn't need to look for its children.
675 if ent.action == 'M':
676 continue
678 children = sorted(self._find_children(path, revnum))
679 for child in children:
680 # Can we move a child directory and its
681 # parent in the same commit? (probably can). Could
682 # cause problems if instead of revnum -1,
683 # we have to look in (copyfrom_path, revnum - 1)
684 entrypath = self.getrelpath("/" + child)
685 if entrypath:
686 # Need to filter out directories here...
687 kind = self._checkpath(entrypath, revnum)
688 if kind != svn.core.svn_node_dir:
689 entries.append(self.recode(entrypath))
691 # Handle directory copies
692 if not ent.copyfrom_path or not parents:
693 continue
694 # Copy sources not in parent revisions cannot be
695 # represented, ignore their origin for now
696 pmodule, prevnum = self.revsplit(parents[0])[1:]
697 if ent.copyfrom_rev < prevnum:
698 continue
699 copyfrompath = self.getrelpath(ent.copyfrom_path, pmodule)
700 if not copyfrompath:
701 continue
702 copyfrom[path] = ent
703 self.ui.debug("mark %s came from %s:%d\n"
704 % (path, copyfrompath, ent.copyfrom_rev))
705 children = self._find_children(ent.copyfrom_path, ent.copyfrom_rev)
706 children.sort()
707 for child in children:
708 entrypath = self.getrelpath("/" + child, pmodule)
709 if not entrypath:
710 continue
711 copytopath = path + entrypath[len(copyfrompath):]
712 copytopath = self.getrelpath(copytopath)
713 copies[self.recode(copytopath)] = self.recode(entrypath)
715 return (list(set(entries)), copies)
717 def _fetch_revisions(self, from_revnum, to_revnum):
718 if from_revnum < to_revnum:
719 from_revnum, to_revnum = to_revnum, from_revnum
721 self.child_cset = None
723 def parselogentry(orig_paths, revnum, author, date, message):
724 """Return the parsed commit object or None, and True if
725 the revision is a branch root.
726 """
727 self.ui.debug("parsing revision %d (%d changes)\n" %
728 (revnum, len(orig_paths)))
730 branched = False
731 rev = self.revid(revnum)
732 # branch log might return entries for a parent we already have
734 if rev in self.commits or revnum < to_revnum:
735 return None, branched
737 parents = []
738 # check whether this revision is the start of a branch or part
739 # of a branch renaming
740 orig_paths = sorted(orig_paths.iteritems())
741 root_paths = [(p,e) for p,e in orig_paths if self.module.startswith(p)]
742 if root_paths:
743 path, ent = root_paths[-1]
744 if ent.copyfrom_path:
745 branched = True
746 newpath = ent.copyfrom_path + self.module[len(path):]
747 # ent.copyfrom_rev may not be the actual last revision
748 previd = self.latest(newpath, ent.copyfrom_rev)
749 if previd is not None:
750 prevmodule, prevnum = self.revsplit(previd)[1:]
751 if prevnum >= self.startrev:
752 parents = [previd]
753 self.ui.note(_('found parent of branch %s at %d: %s\n') %
754 (self.module, prevnum, prevmodule))
755 else:
756 self.ui.debug("no copyfrom path, don't know what to do.\n")
758 paths = []
759 # filter out unrelated paths
760 for path, ent in orig_paths:
761 if self.getrelpath(path) is None:
762 continue
763 paths.append((path, ent))
765 # Example SVN datetime. Includes microseconds.
766 # ISO-8601 conformant
767 # '2007-01-04T17:35:00.902377Z'
768 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
770 log = message and self.recode(message) or ''
771 author = author and self.recode(author) or ''
772 try:
773 branch = self.module.split("/")[-1]
774 if branch == 'trunk':
775 branch = ''
776 except IndexError:
777 branch = None
779 cset = commit(author=author,
780 date=util.datestr(date),
781 desc=log,
782 parents=parents,
783 branch=branch,
784 rev=rev)
786 self.commits[rev] = cset
787 # The parents list is *shared* among self.paths and the
788 # commit object. Both will be updated below.
789 self.paths[rev] = (paths, cset.parents)
790 if self.child_cset and not self.child_cset.parents:
791 self.child_cset.parents[:] = [rev]
792 self.child_cset = cset
793 return cset, branched
795 self.ui.note(_('fetching revision log for "%s" from %d to %d\n') %
796 (self.module, from_revnum, to_revnum))
798 try:
799 firstcset = None
800 lastonbranch = False
801 stream = self._getlog([self.module], from_revnum, to_revnum)
802 try:
803 for entry in stream:
804 paths, revnum, author, date, message = entry
805 if revnum < self.startrev:
806 lastonbranch = True
807 break
808 if not paths:
809 self.ui.debug('revision %d has no entries\n' % revnum)
810 continue
811 cset, lastonbranch = parselogentry(paths, revnum, author,
812 date, message)
813 if cset:
814 firstcset = cset
815 if lastonbranch:
816 break
817 finally:
818 stream.close()
820 if not lastonbranch and firstcset and not firstcset.parents:
821 # The first revision of the sequence (the last fetched one)
822 # has invalid parents if not a branch root. Find the parent
823 # revision now, if any.
824 try:
825 firstrevnum = self.revnum(firstcset.rev)
826 if firstrevnum > 1:
827 latest = self.latest(self.module, firstrevnum - 1)
828 if latest:
829 firstcset.parents.append(latest)
830 except SvnPathNotFound:
831 pass
832 except SubversionException, (inst, num):
833 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
834 raise util.Abort(_('svn: branch has no revision %s') % to_revnum)
835 raise
837 def _getfile(self, file, rev):
838 # TODO: ra.get_file transmits the whole file instead of diffs.
839 mode = ''
840 try:
841 new_module, revnum = self.revsplit(rev)[1:]
842 if self.module != new_module:
843 self.module = new_module
844 self.reparent(self.module)
845 io = StringIO()
846 info = svn.ra.get_file(self.ra, file, revnum, io)
847 data = io.getvalue()
848 # ra.get_files() seems to keep a reference on the input buffer
849 # preventing collection. Release it explicitely.
850 io.close()
851 if isinstance(info, list):
852 info = info[-1]
853 mode = ("svn:executable" in info) and 'x' or ''
854 mode = ("svn:special" in info) and 'l' or mode
855 except SubversionException, e:
856 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
857 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
858 if e.apr_err in notfound: # File not found
859 raise IOError()
860 raise
861 if mode == 'l':
862 link_prefix = "link "
863 if data.startswith(link_prefix):
864 data = data[len(link_prefix):]
865 return data, mode
867 def _find_children(self, path, revnum):
868 path = path.strip('/')
869 pool = Pool()
870 rpath = '/'.join([self.baseurl, urllib.quote(path)]).strip('/')
871 return ['%s/%s' % (path, x) for x in
872 svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool).keys()]
874 def getrelpath(self, path, module=None):
875 if module is None:
876 module = self.module
877 # Given the repository url of this wc, say
878 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
879 # extract the "entry" portion (a relative path) from what
880 # svn log --xml says, ie
881 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
882 # that is to say "tests/PloneTestCase.py"
883 if path.startswith(module):
884 relative = path.rstrip('/')[len(module):]
885 if relative.startswith('/'):
886 return relative[1:]
887 elif relative == '':
888 return relative
890 # The path is outside our tracked tree...
891 self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
892 return None
894 def _checkpath(self, path, revnum):
895 # ra.check_path does not like leading slashes very much, it leads
896 # to PROPFIND subversion errors
897 return svn.ra.check_path(self.ra, path.strip('/'), revnum)
899 def _getlog(self, paths, start, end, limit=0, discover_changed_paths=True,
900 strict_node_history=False):
901 # Normalize path names, svn >= 1.5 only wants paths relative to
902 # supplied URL
903 relpaths = []
904 for p in paths:
905 if not p.startswith('/'):
906 p = self.module + '/' + p
907 relpaths.append(p.strip('/'))
908 args = [self.baseurl, relpaths, start, end, limit, discover_changed_paths,
909 strict_node_history]
910 arg = encodeargs(args)
911 hgexe = util.hgexecutable()
912 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
913 stdin, stdout = util.popen2(cmd)
914 stdin.write(arg)
915 try:
916 stdin.close()
917 except IOError:
918 raise util.Abort(_('Mercurial failed to run itself, check'
919 ' hg executable is in PATH'))
920 return logstream(stdout)
922 pre_revprop_change = '''#!/bin/sh
924 REPOS="$1"
925 REV="$2"
926 USER="$3"
927 PROPNAME="$4"
928 ACTION="$5"
930 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
931 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
932 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
934 echo "Changing prohibited revision property" >&2
935 exit 1
936 '''
938 class svn_sink(converter_sink, commandline):
939 commit_re = re.compile(r'Committed revision (\d+).', re.M)
941 def prerun(self):
942 if self.wc:
943 os.chdir(self.wc)
945 def postrun(self):
946 if self.wc:
947 os.chdir(self.cwd)
949 def join(self, name):
950 return os.path.join(self.wc, '.svn', name)
952 def revmapfile(self):
953 return self.join('hg-shamap')
955 def authorfile(self):
956 return self.join('hg-authormap')
958 def __init__(self, ui, path):
959 converter_sink.__init__(self, ui, path)
960 commandline.__init__(self, ui, 'svn')
961 self.delete = []
962 self.setexec = []
963 self.delexec = []
964 self.copies = []
965 self.wc = None
966 self.cwd = os.getcwd()
968 path = os.path.realpath(path)
970 created = False
971 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
972 self.wc = path
973 self.run0('update')
974 else:
975 wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
977 if os.path.isdir(os.path.dirname(path)):
978 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
979 ui.status(_('initializing svn repo %r\n') %
980 os.path.basename(path))
981 commandline(ui, 'svnadmin').run0('create', path)
982 created = path
983 path = util.normpath(path)
984 if not path.startswith('/'):
985 path = '/' + path
986 path = 'file://' + path
988 ui.status(_('initializing svn wc %r\n') % os.path.basename(wcpath))
989 self.run0('checkout', path, wcpath)
991 self.wc = wcpath
992 self.opener = util.opener(self.wc)
993 self.wopener = util.opener(self.wc)
994 self.childmap = mapfile(ui, self.join('hg-childmap'))
995 self.is_exec = util.checkexec(self.wc) and util.is_exec or None
997 if created:
998 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
999 fp = open(hook, 'w')
1000 fp.write(pre_revprop_change)
1001 fp.close()
1002 util.set_flags(hook, False, True)
1004 xport = transport.SvnRaTransport(url=geturl(path))
1005 self.uuid = svn.ra.get_uuid(xport.ra)
1007 def wjoin(self, *names):
1008 return os.path.join(self.wc, *names)
1010 def putfile(self, filename, flags, data):
1011 if 'l' in flags:
1012 self.wopener.symlink(data, filename)
1013 else:
1014 try:
1015 if os.path.islink(self.wjoin(filename)):
1016 os.unlink(filename)
1017 except OSError:
1018 pass
1019 self.wopener(filename, 'w').write(data)
1021 if self.is_exec:
1022 was_exec = self.is_exec(self.wjoin(filename))
1023 else:
1024 # On filesystems not supporting execute-bit, there is no way
1025 # to know if it is set but asking subversion. Setting it
1026 # systematically is just as expensive and much simpler.
1027 was_exec = 'x' not in flags
1029 util.set_flags(self.wjoin(filename), False, 'x' in flags)
1030 if was_exec:
1031 if 'x' not in flags:
1032 self.delexec.append(filename)
1033 else:
1034 if 'x' in flags:
1035 self.setexec.append(filename)
1037 def _copyfile(self, source, dest):
1038 # SVN's copy command pukes if the destination file exists, but
1039 # our copyfile method expects to record a copy that has
1040 # already occurred. Cross the semantic gap.
1041 wdest = self.wjoin(dest)
1042 exists = os.path.exists(wdest)
1043 if exists:
1044 fd, tempname = tempfile.mkstemp(
1045 prefix='hg-copy-', dir=os.path.dirname(wdest))
1046 os.close(fd)
1047 os.unlink(tempname)
1048 os.rename(wdest, tempname)
1049 try:
1050 self.run0('copy', source, dest)
1051 finally:
1052 if exists:
1053 try:
1054 os.unlink(wdest)
1055 except OSError:
1056 pass
1057 os.rename(tempname, wdest)
1059 def dirs_of(self, files):
1060 dirs = set()
1061 for f in files:
1062 if os.path.isdir(self.wjoin(f)):
1063 dirs.add(f)
1064 for i in strutil.rfindall(f, '/'):
1065 dirs.add(f[:i])
1066 return dirs
1068 def add_dirs(self, files):
1069 add_dirs = [d for d in sorted(self.dirs_of(files))
1070 if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
1071 if add_dirs:
1072 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
1073 return add_dirs
1075 def add_files(self, files):
1076 if files:
1077 self.xargs(files, 'add', quiet=True)
1078 return files
1080 def tidy_dirs(self, names):
1081 deleted = []
1082 for d in sorted(self.dirs_of(names), reverse=True):
1083 wd = self.wjoin(d)
1084 if os.listdir(wd) == '.svn':
1085 self.run0('delete', d)
1086 deleted.append(d)
1087 return deleted
1089 def addchild(self, parent, child):
1090 self.childmap[parent] = child
1092 def revid(self, rev):
1093 return u"svn:%s@%s" % (self.uuid, rev)
1095 def putcommit(self, files, copies, parents, commit, source, revmap):
1096 # Apply changes to working copy
1097 for f, v in files:
1098 try:
1099 data = source.getfile(f, v)
1100 except IOError:
1101 self.delete.append(f)
1102 else:
1103 e = source.getmode(f, v)
1104 self.putfile(f, e, data)
1105 if f in copies:
1106 self.copies.append([copies[f], f])
1107 files = [f[0] for f in files]
1109 for parent in parents:
1110 try:
1111 return self.revid(self.childmap[parent])
1112 except KeyError:
1113 pass
1114 entries = set(self.delete)
1115 files = frozenset(files)
1116 entries.update(self.add_dirs(files.difference(entries)))
1117 if self.copies:
1118 for s, d in self.copies:
1119 self._copyfile(s, d)
1120 self.copies = []
1121 if self.delete:
1122 self.xargs(self.delete, 'delete')
1123 self.delete = []
1124 entries.update(self.add_files(files.difference(entries)))
1125 entries.update(self.tidy_dirs(entries))
1126 if self.delexec:
1127 self.xargs(self.delexec, 'propdel', 'svn:executable')
1128 self.delexec = []
1129 if self.setexec:
1130 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
1131 self.setexec = []
1133 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
1134 fp = os.fdopen(fd, 'w')
1135 fp.write(commit.desc)
1136 fp.close()
1137 try:
1138 output = self.run0('commit',
1139 username=util.shortuser(commit.author),
1140 file=messagefile,
1141 encoding='utf-8')
1142 try:
1143 rev = self.commit_re.search(output).group(1)
1144 except AttributeError:
1145 if not files:
1146 return parents[0]
1147 self.ui.warn(_('unexpected svn output:\n'))
1148 self.ui.warn(output)
1149 raise util.Abort(_('unable to cope with svn output'))
1150 if commit.rev:
1151 self.run('propset', 'hg:convert-rev', commit.rev,
1152 revprop=True, revision=rev)
1153 if commit.branch and commit.branch != 'default':
1154 self.run('propset', 'hg:convert-branch', commit.branch,
1155 revprop=True, revision=rev)
1156 for parent in parents:
1157 self.addchild(parent, rev)
1158 return self.revid(rev)
1159 finally:
1160 os.unlink(messagefile)
1162 def puttags(self, tags):
1163 self.ui.warn(_('XXX TAGS NOT IMPLEMENTED YET\n'))
