#!/usr/bin/env python
import sys
import os
import urlparse
import urllib
from bs4 import BeautifulSoup
def mac_addr_str(f_data):
global fptr
global mac_list
word_array = f_data.split(" ")
for word in word_array:
if len(word) == 17 and ':' in word[2] and ':' in word[5] and ':' in word[8] and ':' in word[11] and ':' in word[14]:
if word not in mac_list:
mac_list.append(word)
fptr.writelines(word +"\n")
print word
url = "http://stackoverflow.com/questions/tagged/mac-address"
url_list = [url]
visited = [url]
pwd = os.getcwd();
pwd = pwd + "/internet_mac.txt";
fptr = open(pwd, "a")
mac_list = []
while len(url_list) > 0:
try:
htmltext = urllib.urlopen(url_list[0]).read()
except:
url_list[0]
mac_addr_str(htmltext)
soup = BeautifulSoup(htmltext)
url_list.pop(0)
for tag in soup.findAll('a',href=True):
tag['href'] = urlparse.urljoin(url,tag['href'])
if url in tag['href'] and tag['href'] not in visited:
url_list.append(tag['href'])
visited.append(tag['href'])