Comments (15)
html.replace(/<([\w-]+)(?=\s)(?=[^>]*?(\shref=(['"]).*?\3)?)(?=[^>]*?(\ssrc=(['"]).*?\5)?)[^>]*/g, '<$1$2$4')
from technology-blog.
贡献一个测试用例:
<div value="src='AAA AAA'" src='BBBBBB' href=abcabc controls>
value="src='AAA AAA'" src='BBBBBB' href=abcabc
</div>
双引号里带空格、单引号的
单引号里带空格、双引号的
没有引号的
不属于 attribute 的
没有值的 attribute
from technology-blog.
撸了一个,不知道对第七条用例算不算失败的
增强版:处理了第七条用例同时有href和src的情况,如果出现多个href我表示是否思考不应该在正则这个层面处理了。。
const list = [
'<a test=adsf>asdf</a>asdfasdf<p>adf</p>',
'<a href=adsf>asdf</a>asdfasdf<p>adf</p>',
'<a href="adsf">asdf</a>asdfasdf<p>adf</p>',
'<a href="adsf">asdf</a>asdfasdf<p>adf</p>',
'<a aa="asdfads" href="adsf">asdf</a>asdfasdf<p>adf</p>',
'<a href="adsf" aa="asdfads">asdf</a>asdfasdf<p>adf</p>',
'<a aa="asdfads" href="adsf" aa="asdfads">asdf</a>asdfasdf<p>adf</p>',
'<a aa="asdfads" href="adsf" aa="asdfads" src="adsf" aa="asdfads">asdf</a>asdfasdf<p>adf</p>',
`
<a aa="asdfads"
href="adsf"
aa="asdfads">asdf</a>asdfasdf<p>adf</p>
`,
`
<div value="src='AAA AAA'" src='BBBBBB' href=abcabc>
value="src='AAA AAA'" src='BBBBBB' href=abcabc
</div>`,
'<script src="adsf"/>asdf<link href="adsf" />asdf<p>adf</p>',
];
const regs = {
mine: {
search: /(<\w+)[^>]*(\s+\b(href|src)=("|'|)[^\s>]*\4)(?:\s)?[^>]*?(?=>)/g,
replace: '$1$2',
},
enhanced: {
search: /(?<=<\w+\b)([^>]*?(\s+\b(href|src)=("|'|)[^\s>]*\4))?([^>]*?(\s+\b(href|src)=("|'|)[^\s>]*\8))?[^>]*?(?=\/?>)/g,
replace: '$2$6',
}
};
function run(reg, testIndex = -1) {
for (const [index, item] of list.entries()) {
if (testIndex < 0) {
console.log('▶', index, item);
let result = item.replace(reg.search, reg.replace);
console.log('result:', result, '\n');
} else if (testIndex === index) {
console.log('▶', index, item);
let match = reg.search.exec(item);
console.log(match);
let result = item.replace(reg.search, reg.replace);
console.log('result:', result, '\n');
}
}
}
run(regs.enhanced, -1);
from technology-blog.
//参考了vue源码看着貌似有点长
'<div class="xxxx" href="xxxddx" >sdfsdf</div><span name="xxsdf" src="sdfsdf"></span>'.
replace(/<([a-zA-Z_][\w\-\.]*\s*)((?:\s*(?:[^\s"'<>\/=]+)(?:\s*(?:=)\s*(?:"(?:[^"]*)"+|'(?:[^']*)'+|(?:[^\s"'=<>`]+)))?)*)(\s*(?:\/)?\s*)>/g,(s,s1,s2,s3)=>{
var hrefSrc = (s2 || '').match(/\s*(?:href|src)(?:\s*(?:=)\s*(?:"(?:[^"]*)"+|'(?:[^']*)'+|(?:[^\s"'=<>`]+)))?/g);
return '<' + s1 + (hrefSrc || []).join('') + s3 + '>'
});//<div href="xxxddx" >sdfsdf</div><span src="sdfsdf"></span>
from technology-blog.
//给我点差评
;( () => {
str = `<a afa href="http://fanyi-pro.baidu.com/?hmsr=%E7%99%BE%E5%BA%A6%E7%BF%BB%E8%AF%91&hmpl=%E5%9B%BA%E5%AE%9A%E5%85%A5%E5%8F%A3&hmcu=%E9%A1%B6%E9%83%A8%E6%8C%89%E9%92%AE&hmkw=&hmci=" target="_blank" class="list-name" src="" f="" aefa>人工翻译</a>
<div id="search-box" class="search-box-new line">
<ul class="channel grid">
<li><a log="sc_pos:c_baidu" data-type='baidu' rel="nofollow" href="http://www.baidu.com/s?cl=3&wd=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD">网页</a></li>
<li><a log="sc_pos:c_news" data-type='news' rel="nofollow" href="https://www.baidu.com/s?rtt=1&bsst=1&cl=2&tn=news&word=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD&fr=zhidao">资讯</a></li>
<li><a log="sc_pos:c_video" data-type='video' rel="nofollow" href="https://www.baidu.com/sf/vsearch?pd=video&tn=vsearch&wd=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD&rsv_spt=16">视频</a></li>
<li><a log="sc_pos:c_pic" data-type='image' rel="nofollow" href="http://image.baidu.com/search/index?tn=baiduimage&ct=201326592&lm=-1&cl=2&word=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD&t=3&ie=gbk">图片</a></li>
<li><strong>知道</strong></li>
<li><a log="sc_pos:c_doc" data-type='wenku' rel="nofollow" href="http://wenku.baidu.com/search?word=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD&lm=0&od=0">文库</a></li>
<li><a log="sc_pos:c_tieba" data-type='tieba' rel="nofollow" href="http://tieba.baidu.com/f?kw=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD&t=4">贴吧</a></li><li><a log="sc_pos:c_b2b" data-type='b2b' rel="nofollow" href="https://b2b.baidu.com/s?q=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD&fr=www">采购</a></li>
<li><a log="sc_pos:c_map" data-type='map' rel="nofollow" href="http://map.baidu.com/m?word=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD&fr=map007">地图</a></li><li><a log="sc_pos:c_more" data-type="more" href="http://www.baidu.com/more/">更多»</a></li>
</ul>
<div class="search-block clearfix">
<div class="search-cont clearfix">
<a class="logo" href="/" title="百度知道"></a>
<form action="/search" name="search-form" method="get" id="search-form-new" class="search-form">
<input class="hdi" id="kw" maxlength="256" tabindex="1" size="46" name="word" value="chrome书签本地文件在什么地方" autocomplete="off" placeholder="" />
<button alog-action="g-search-anwser" type="submit" id="search-btn" hidefocus="true" tabindex="2" class="btn-global">搜索答案</button>
<a href="#" alog-action="g-i-ask" class="i-ask-link" id="ask-btn-new">我要提问</a>
</form>
</div>
</div>
</div>
`
reg = /<[A-z-]+(?:\s((?!href|src)[a-z-]+)=(?:'([^"]+|"[^"]+")+'|"([^']+|'[^']+')+"|''|""|[a-z]+))|\s[a-z]+>/igm
reg2 = /(?:\s((?!href|src)[a-z-]+)(="[^"<>]+"|='[^'<>]+'|=""|=''|(=|)[A-z0-9]+))/igm
reg_html_one = /(<[A-z]+[^<>]+(\/>|>))/igm
re = str.replace(reg_html_one,($1) => {
var re2 = $1.replace(reg2,'')
return re2
})
console.log(re)
})()
//结果正确
//<a href="http://fanyi-pro.baidu.com/?hmsr=%E7%99%BE%E5%BA%A6%E7%BF%BB%E8%AF%91&hmpl=%E5%9B%BA%E5%AE%9A%E5%85%A5%E5%8F%A3&hmcu=%E9%A1%B6%E9%83%A8%E6%8C%89%E9%92%AE&hmkw=&hmci=" src="">人工翻译</a>
<div>
<ul>
<li><a href="http://www.baidu.com/s?cl=3&wd=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD">网页</a></li>
<li><a href="https://www.baidu.com/s?rtt=1&bsst=1&cl=2&tn=news&word=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD&fr=zhidao">资讯</a></li>
<li><a href="https://www.baidu.com/sf/vsearch?pd=video&tn=vsearch&wd=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD&rsv_spt=16">视频</a></li>
<li><a href="http://image.baidu.com/search/index?tn=baiduimage&ct=201326592&lm=-1&cl=2&word=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD&t=3&ie=gbk">图片</a></li>
<li><strong>知道</strong></li>
<li><a href="http://wenku.baidu.com/search?word=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD&lm=0&od=0">文库</a></li>
<li><a href="http://tieba.baidu.com/f?kw=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD&t=4">贴吧</a></li><li><a href="https://b2b.baidu.com/s?q=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD&fr=www">采购</a></li>
<li><a href="http://map.baidu.com/m?word=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD&fr=map007">地图</a></li><li><a href="http://www.baidu.com/more/">更多»</a></li>
</ul>
<div>
<div>
<a href="/"></a>
<form>
<input />
<button >搜索答案</button>
<a href="#">我要提问</a>
</form>
</div>
</div>
</div>
from technology-blog.
匹配标签的属性内容:<([\w-]+)(.*?)>
匹配对应每项属性:\s?([\w-]+)=['"]?[\w-\s]+['"]?
var html = `<div title="test">
<icon v-if="icon" name="info" class="m-message--icon" slot="icon" />
<img src="http://img.123.jpg" title="img"/>
<input checked=true data-value="12" checked/>
<a href="#/test" on-click="jump"></a>
<m-alert title='Alert message'>alert</m-alert>
</div>`
var whiteList = [ 'href', 'src' ]
html.replace(/<([\w-]+)(.*?)>/g, function (ori, tag, content) {
console.log(1, arguments)
return ori.replace(/\s?([\w-]+)=['"]?[\w-\s]+['"]?/g, function (attr, name) {
console.log(2, arguments)
if (whiteList.indexOf(name) > -1) return attr
return ''
})
})
以上例子还是存在一些属性无法去除,比如没有值的属性
from technology-blog.
正则写不来,但是好像可以直接js获取元素来实现,哈哈哈哈
from technology-blog.
/\s(?!src=|href=)[\w-]*=['\w"]+/
还是要限制一下尖括号吧,不然正文里的也匹配上了
from technology-blog.
来个boss级的终极版,参考引用于vue源码
/\s(?!href=|src=)[^\s"'<>\/=]+(?:\s*(=)\s*(?:"([^"])"+|'([^'])'+|([^\s"'=<>`]+)))?/g
from technology-blog.
我目前未解决的问题是单双引号嵌套如何确保引号正确
from technology-blog.
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<!-- <script js="" src="/respond.min.js"></script> -->
<!--[if lt IE 9]>
<script src="/html5shiv.min.js"></script>
<![endif]-->
<title>Document</title>
</head>
<body>
<textarea name="" id="test" cols="110" rows="110"></textarea>
<script type="text/javascript">
var a = "<div//>< hef='s' a><a/>";
var b = "<script ja><\/script><a/>";
var b = true || false;
</script>
<script>
var html = "";
var breakDeep = 0;
document.getElementsByTagName("html")[0].innerHTML.match(/<[^>]+>|[^<|>|\w]|\w+\b|[\s\|\>]+/mg).forEach(function(item, index){
// script应尽量避免出现,判断不严谨
if(/<script/.test(item)){
html += item.replace(/\s(?!src=|href=)[\w-]*=('|").*?\1/g, '');
breakDeep++;
}else if(/<\/script/.test(item)){
breakDeep--;
}
if(breakDeep > 0){
if(/<script/.test(item) == false){
html += item;
}
}else{
html += item.replace(/\s(?!src=|href=)[\w-]*=('|").*?\1/g, '');
}
});
test.value = html;
</script>
</body>
</html>
from technology-blog.
repost
from technology-blog.
正则写不来,但是好像可以直接js获取元素来实现,哈哈哈哈
脱离浏览器你怎么办
from technology-blog.
答案呢?
from technology-blog.
我觉得,首先正则很强大,但不要低估html的容错性(比如非闭合,嵌套错误),而且就算是格式化后的html标签,也有特例,比如
<textarea name="" id="" cols="30" rows="10">
<a value="src='AAA AAA'" src='BBBBBB' href=abcabc wtf=弄啥嘞>
value="src='AAA AAA'" src='BBBBBB' href=abcabc
</textarea>
就连语法高亮都认为textarea
里面是标签呢~
其次正则并不能匹配所有的邮箱,参考https://github.com/kdeldycke/awesome-falsehood#emails
期待答案
from technology-blog.
Related Issues (20)
- 33.如果一个实习生,他本地git的A分支被误删了, A分支代码没有被push到远程,如何找到之前A的提交记录和代码 HOT 4
- 34.如果你用nodejs实现的爬虫服务器的IP被指定网站封了,如何解封? HOT 2
- 35.用nodejs,将base64转化成png文件 HOT 1
- 36.请设计一个方案:有a、b、c三个npm插件,它们会经常更新,在前端项目npm run start启动后,要求a、b、c三个npm插件自动更新到最新版本 HOT 2
- 37.大文件转存问题:服务器A有一个1000G的文件, 需要通过服务端B转发到服务端C,但是服务器B内存只有1个g, 怎么去实现这个大文件转存 HOT 4
- 小蝌蚪传记:PNG图片压缩原理--屌丝的眼泪 HOT 1
- KOA2框架原码解析和实现
- 前端人工智能:通过机器学习推导函数方程式
- webpack工程化打包原理解析与实现
- 小蝌蚪传记:git时光穿梭机--女神的侧颜 HOT 1
- 小蝌蚪传记:端口转发——夜店传说 HOT 4
- 小蝌蚪传记:200行代码实现前端无痕埋点——顶级渣男 HOT 3
- 小蝌蚪传记:nodejs线上模块热部署原理与实现——富婆的爱
- 小蝌蚪日记:通过console.log高仿FBI Warning
- 小蝌蚪传记:前端菜鸟让接口提速60%的优化与原理
- 小蝌蚪系列:面试中软性问题的套路与反套路(上) HOT 4
- 小蝌蚪传记:js、css、html压缩与混淆汇总 --> 变弯记
- 小蝌蚪传记:前端实用技巧,通过babel精准操作js文件——暗恋
- 【开源自荐】推荐一个每日更新的前端面试题库 HOT 3
- 【开源自荐】SolidUI 一句话生成任何图形 HOT 3
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from technology-blog.