# wersja parsera bez pakietu XML # autor: Maciej Berêsewicz # ## podstawowe library(RCurl) ## dodatkowe library(XML) library(RJSONIO) library(plyr) library(memisc) N <- 1000 linki<-NULL motos<-paste("http://otomoto.pl/index.php?sect=search&sub=car&page=",1:N,"&qid=2206521429&order_by=i",sep="") otoMoto <- file("otoMoto2.txt","w") sink(otoMoto,append = TRUE) for (k in 1:N){ doc<-getURL(motos[k]) auta<-strsplit(doc,'\">') auta4<-strsplit(doc2,'adminNav') if (is.na(auta4[[1]][2])) { auta5<-strsplit(auta4[[1]][1],'

') auta5<-strsplit(auta5[[1]][2],'

') auta5<-strsplit(auta5[[1]][1]," ") cat("Marka:",auta5[[1]][1],"\nModel:",auta5[[1]][2],"\n" ) } else { auta5<-strsplit(auta4[[1]][2],'') auta6<-strsplit(auta5[[1]],'') cat("Marka:",auta6[[1]][2],"\nModel:",auta6[[2]][2],"\n" ) } auta7<-strsplit(doc2,'>Cena') auta8<-strsplit(auta7[[1]][2],"[:<]") auta9<-strsplit(auta8[[1]],'strong>') cat("Cena:",auta9[[5]][2],auta8[[1]][1],"\n") typ<-strsplit(doc2,"Typ:") typ2<-strsplit(typ[[1]][2],'') typ3<-strsplit(typ2[[1]][2],'') cat("Typ: ", typ3[[1]][1],"\n") for (j in 1:length(auta3[[1]]) ) { if (nchar(auta3[[1]][j])<300) { auta31<-strsplit(auta3[[1]][j],"") auta41<-strsplit(auta31[[1]],"") auta51<-strsplit(auta41[[2]][2],"") cat(auta41[[1]][1],auta51[[1]][1],"\n") } } auto10<-strsplit(doc2,'

Dodatkowe wyposażenie:

\n

') auto11<-strsplit(auto10[[1]][2],'

') cat("Wyposazenie dodatkowe:",auto11[[1]][1],"\n") auto12<-strsplit(doc2,'

Dodatkowe informacje:

\n

') auto13<-strsplit(auto12[[1]][2],'

') cat("Informacje dodatkowe:",auto13[[1]][1],"\n") } } } }