newstring = ''
cont = 0
for each ch in string:
if cont:
if (ch >> 6) != 2: # high 2 bits are 10
# do whatever, e.g. skip it, or skip whole point, or?
else:
# acceptable continuation of multi-octlet char
newstring += ch
cont -= 1
else:
if (ch >> 7): # high bit set?
c = (ch << 1) # strip the high bit marker
while (c & 1): # while the high bit indicates another octlet
c <<= 1
cont += 1
if cont > 4:
# more than 4 octels not allowed; cope with error
if !cont:
# illegal, do something sensible
newstring += ch # or whatever
if cont:
# last utf-8 was not terminated, cope